/* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_ #define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_ #include <stdint.h> #include <stdlib.h> #include <sys/mman.h> #include <memory> #include <set> #include <string> #include <unordered_set> #include <vector> #include "base/allocator.h" #include "base/bit_utils.h" #include "base/mutex.h" #include "base/logging.h" #include "globals.h" #include "thread.h" namespace art { class MemMap; namespace gc { namespace allocator { // A runs-of-slots memory allocator. class RosAlloc { private: // Represents a run of free pages. class FreePageRun { public: uint8_t magic_num_; // The magic number used for debugging only. bool IsFree() const { return !kIsDebugBuild || magic_num_ == kMagicNumFree; } size_t ByteSize(RosAlloc* rosalloc) const REQUIRES(rosalloc->lock_) { const uint8_t* fpr_base = reinterpret_cast<const uint8_t*>(this); size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base); size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx]; DCHECK_GE(byte_size, static_cast<size_t>(0)); DCHECK_ALIGNED(byte_size, kPageSize); return byte_size; } void SetByteSize(RosAlloc* rosalloc, size_t byte_size) REQUIRES(rosalloc->lock_) { DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0)); uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this); size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base); rosalloc->free_page_run_size_map_[pm_idx] = byte_size; } void* Begin() { return reinterpret_cast<void*>(this); } void* End(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this); uint8_t* end = fpr_base + ByteSize(rosalloc); return end; } bool IsLargerThanPageReleaseThreshold(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { return ByteSize(rosalloc) >= rosalloc->page_release_size_threshold_; } bool IsAtEndOfSpace(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { return reinterpret_cast<uint8_t*>(this) + ByteSize(rosalloc) == rosalloc->base_ + rosalloc->footprint_; } bool ShouldReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { switch (rosalloc->page_release_mode_) { case kPageReleaseModeNone: return false; case kPageReleaseModeEnd: return IsAtEndOfSpace(rosalloc); case kPageReleaseModeSize: return IsLargerThanPageReleaseThreshold(rosalloc); case kPageReleaseModeSizeAndEnd: return IsLargerThanPageReleaseThreshold(rosalloc) && IsAtEndOfSpace(rosalloc); case kPageReleaseModeAll: return true; default: LOG(FATAL) << "Unexpected page release mode "; return false; } } void ReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { uint8_t* start = reinterpret_cast<uint8_t*>(this); size_t byte_size = ByteSize(rosalloc); DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0)); if (ShouldReleasePages(rosalloc)) { rosalloc->ReleasePageRange(start, start + byte_size); } } private: DISALLOW_COPY_AND_ASSIGN(FreePageRun); }; // The slot header. class Slot { public: Slot* Next() const { return next_; } void SetNext(Slot* next) { next_ = next; } // The slot right before this slot in terms of the address. Slot* Left(size_t bracket_size) { return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size); } void Clear() { next_ = nullptr; } private: Slot* next_; // Next slot in the list. friend class RosAlloc; }; // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to // traverse the list from the head to the tail when merging free lists. // We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the // tail in the allocation fast path for a performance reason. template<bool kUseTail = true> class SlotFreeList { public: SlotFreeList() : head_(0U), tail_(0), size_(0) {} Slot* Head() const { return reinterpret_cast<Slot*>(head_); } Slot* Tail() const { CHECK(kUseTail); return reinterpret_cast<Slot*>(tail_); } size_t Size() const { return size_; } // Removes from the head of the free list. Slot* Remove() { Slot* slot; if (kIsDebugBuild) { Verify(); } Slot** headp = reinterpret_cast<Slot**>(&head_); Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; Slot* old_head = *headp; if (old_head == nullptr) { // List was empty. if (kUseTail) { DCHECK(*tailp == nullptr); } return nullptr; } else { // List wasn't empty. if (kUseTail) { DCHECK(*tailp != nullptr); } Slot* old_head_next = old_head->Next(); slot = old_head; *headp = old_head_next; if (kUseTail && old_head_next == nullptr) { // List becomes empty. *tailp = nullptr; } } slot->Clear(); --size_; if (kIsDebugBuild) { Verify(); } return slot; } void Add(Slot* slot) { if (kIsDebugBuild) { Verify(); } DCHECK(slot != nullptr); DCHECK(slot->Next() == nullptr); Slot** headp = reinterpret_cast<Slot**>(&head_); Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; Slot* old_head = *headp; if (old_head == nullptr) { // List was empty. if (kUseTail) { DCHECK(*tailp == nullptr); } *headp = slot; if (kUseTail) { *tailp = slot; } } else { // List wasn't empty. if (kUseTail) { DCHECK(*tailp != nullptr); } *headp = slot; slot->SetNext(old_head); } ++size_; if (kIsDebugBuild) { Verify(); } } // Merge the given list into this list. Empty the given list. // Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't // currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2) // supporting the kUseTail == false parameter would require a O(n) linked list traversal to do // the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid. void Merge(SlotFreeList<true>* list) { if (kIsDebugBuild) { Verify(); CHECK(list != nullptr); list->Verify(); } if (list->Size() == 0) { return; } Slot** headp = reinterpret_cast<Slot**>(&head_); Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; Slot* old_head = *headp; if (old_head == nullptr) { // List was empty. *headp = list->Head(); if (kUseTail) { *tailp = list->Tail(); } size_ = list->Size(); } else { // List wasn't empty. DCHECK(list->Head() != nullptr); *headp = list->Head(); DCHECK(list->Tail() != nullptr); list->Tail()->SetNext(old_head); // if kUseTail, no change to tailp. size_ += list->Size(); } list->Reset(); if (kIsDebugBuild) { Verify(); } } void Reset() { head_ = 0; if (kUseTail) { tail_ = 0; } size_ = 0; } void Verify() { Slot* head = reinterpret_cast<Slot*>(head_); Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr; if (size_ == 0) { CHECK(head == nullptr); if (kUseTail) { CHECK(tail == nullptr); } } else { CHECK(head != nullptr); if (kUseTail) { CHECK(tail != nullptr); } size_t count = 0; for (Slot* slot = head; slot != nullptr; slot = slot->Next()) { ++count; if (kUseTail && slot->Next() == nullptr) { CHECK_EQ(slot, tail); } } CHECK_EQ(size_, count); } } private: // A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same // layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1) // uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the // future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise // (won't open up enough space to cause an extra slot to be available). uint64_t head_; // A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same // layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists. // Unused if kUseTail is false. uint64_t tail_; // The number of slots in the list. This is used to make it fast to check if a free list is all // free without traversing the whole free list. uint32_t size_; uint32_t padding_ ATTRIBUTE_UNUSED; friend class RosAlloc; }; // Represents a run of memory slots of the same size. // // A run's memory layout: // // +-------------------+ // | magic_num | // +-------------------+ // | size_bracket_idx | // +-------------------+ // | is_thread_local | // +-------------------+ // | to_be_bulk_freed | // +-------------------+ // | | // | free list | // | | // +-------------------+ // | | // | bulk free list | // | | // +-------------------+ // | | // | thread-local free | // | list | // | | // +-------------------+ // | padding due to | // | alignment | // +-------------------+ // | slot 0 | // +-------------------+ // | slot 1 | // +-------------------+ // | slot 2 | // +-------------------+ // ... // +-------------------+ // | last slot | // +-------------------+ // class Run { public: uint8_t magic_num_; // The magic number used for debugging. uint8_t size_bracket_idx_; // The index of the size bracket of this run. uint8_t is_thread_local_; // True if this run is used as a thread-local run. uint8_t to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with a bulk free. uint32_t padding_ ATTRIBUTE_UNUSED; // Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail. SlotFreeList<false> free_list_; SlotFreeList<true> bulk_free_list_; SlotFreeList<true> thread_local_free_list_; // Padding due to alignment // Slot 0 // Slot 1 // ... // Returns the byte size of the header. static size_t fixed_header_size() { return sizeof(Run); } Slot* FirstSlot() const { const uint8_t idx = size_bracket_idx_; return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]); } Slot* LastSlot() { const uint8_t idx = size_bracket_idx_; const size_t bracket_size = bracketSizes[idx]; uintptr_t end = reinterpret_cast<uintptr_t>(End()); Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size); DCHECK_LE(FirstSlot(), last_slot); return last_slot; } SlotFreeList<false>* FreeList() { return &free_list_; } SlotFreeList<true>* BulkFreeList() { return &bulk_free_list_; } SlotFreeList<true>* ThreadLocalFreeList() { return &thread_local_free_list_; } void* End() { return reinterpret_cast<uint8_t*>(this) + kPageSize * numOfPages[size_bracket_idx_]; } void SetIsThreadLocal(bool is_thread_local) { is_thread_local_ = is_thread_local ? 1 : 0; } bool IsThreadLocal() const { return is_thread_local_ != 0; } // Set up the free list for a new/empty run. void InitFreeList() { const uint8_t idx = size_bracket_idx_; const size_t bracket_size = bracketSizes[idx]; Slot* first_slot = FirstSlot(); // Add backwards so the first slot is at the head of the list. for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) { free_list_.Add(slot); } } // Merge the thread local free list to the free list. Used when a thread-local run becomes // full. bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out); // Merge the bulk free list to the free list. Used in a bulk free. void MergeBulkFreeListToFreeList(); // Merge the bulk free list to the thread local free list. In a bulk free, as a two-step // process, GC will first record all the slots to free in a run in the bulk free list where it // can write without a lock, and later acquire a lock once per run to merge the bulk free list // to the thread-local free list. void MergeBulkFreeListToThreadLocalFreeList(); // Allocates a slot in a run. ALWAYS_INLINE void* AllocSlot(); // Frees a slot in a run. This is used in a non-bulk free. void FreeSlot(void* ptr); // Add the given slot to the bulk free list. Returns the bracket size. size_t AddToBulkFreeList(void* ptr); // Add the given slot to the thread-local free list. void AddToThreadLocalFreeList(void* ptr); // Returns true if all the slots in the run are not in use. bool IsAllFree() const { return free_list_.Size() == numOfSlots[size_bracket_idx_]; } // Returns the number of free slots. size_t NumberOfFreeSlots() { return free_list_.Size(); } // Returns true if all the slots in the run are in use. ALWAYS_INLINE bool IsFull(); // Returns true if the bulk free list is empty. bool IsBulkFreeListEmpty() const { return bulk_free_list_.Size() == 0; } // Returns true if the thread local free list is empty. bool IsThreadLocalFreeListEmpty() const { return thread_local_free_list_.Size() == 0; } // Zero the run's data. void ZeroData(); // Zero the run's header and the slot headers. void ZeroHeaderAndSlotHeaders(); // Iterate over all the slots and apply the given function. void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg); // Dump the run metadata for debugging. std::string Dump(); // Verify for debugging. void Verify(Thread* self, RosAlloc* rosalloc, bool running_on_memory_tool) REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::thread_list_lock_); private: // The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket // size. size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name); // Turns a FreeList into a string for debugging. template<bool kUseTail> std::string FreeListToStr(SlotFreeList<kUseTail>* free_list); // Check a given pointer is a valid slot address and return it as Slot*. Slot* ToSlot(void* ptr) { const uint8_t idx = size_bracket_idx_; const size_t bracket_size = bracketSizes[idx]; const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr) - reinterpret_cast<uint8_t*>(FirstSlot()); DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0)); size_t slot_idx = offset_from_slot_base / bracket_size; DCHECK_LT(slot_idx, numOfSlots[idx]); return reinterpret_cast<Slot*>(ptr); } size_t SlotIndex(Slot* slot) const { const uint8_t idx = size_bracket_idx_; const size_t bracket_size = bracketSizes[idx]; const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot) - reinterpret_cast<uint8_t*>(FirstSlot()); DCHECK_EQ(offset_from_slot_base % bracket_size, 0U); size_t slot_idx = offset_from_slot_base / bracket_size; DCHECK_LT(slot_idx, numOfSlots[idx]); return slot_idx; } // TODO: DISALLOW_COPY_AND_ASSIGN(Run); }; // The magic number for a run. static constexpr uint8_t kMagicNum = 42; // The magic number for free pages. static constexpr uint8_t kMagicNumFree = 43; // The number of size brackets. static constexpr size_t kNumOfSizeBrackets = 42; // The sizes (the slot sizes, in bytes) of the size brackets. static size_t bracketSizes[kNumOfSizeBrackets]; // The numbers of pages that are used for runs for each size bracket. static size_t numOfPages[kNumOfSizeBrackets]; // The numbers of slots of the runs for each size bracket. static size_t numOfSlots[kNumOfSizeBrackets]; // The header sizes in bytes of the runs for each size bracket. static size_t headerSizes[kNumOfSizeBrackets]; // Initialize the run specs (the above arrays). static void Initialize(); static bool initialized_; // Returns the byte size of the bracket size from the index. static size_t IndexToBracketSize(size_t idx) { DCHECK_LT(idx, kNumOfSizeBrackets); return bracketSizes[idx]; } // Returns the index of the size bracket from the bracket size. static size_t BracketSizeToIndex(size_t size) { DCHECK(8 <= size && ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) || (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) || size == 1 * KB || size == 2 * KB)); size_t idx; if (UNLIKELY(size == 1 * KB)) { idx = kNumOfSizeBrackets - 2; } else if (UNLIKELY(size == 2 * KB)) { idx = kNumOfSizeBrackets - 1; } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) { DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U); idx = size / kThreadLocalBracketQuantumSize - 1; } else { DCHECK(size <= kMaxRegularBracketSize); DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U); idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1) + kNumThreadLocalSizeBrackets; } DCHECK(bracketSizes[idx] == size); return idx; } // Returns true if the given allocation size is for a thread local allocation. static bool IsSizeForThreadLocal(size_t size) { bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize; DCHECK(size > kLargeSizeThreshold || (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets))); return is_size_for_thread_local; } // Rounds up the size up the nearest bracket size. static size_t RoundToBracketSize(size_t size) { DCHECK(size <= kLargeSizeThreshold); if (LIKELY(size <= kMaxThreadLocalBracketSize)) { return RoundUp(size, kThreadLocalBracketQuantumSize); } else if (size <= kMaxRegularBracketSize) { return RoundUp(size, kBracketQuantumSize); } else if (UNLIKELY(size <= 1 * KB)) { return 1 * KB; } else { DCHECK_LE(size, 2 * KB); return 2 * KB; } } // Returns the size bracket index from the byte size with rounding. static size_t SizeToIndex(size_t size) { DCHECK(size <= kLargeSizeThreshold); if (LIKELY(size <= kMaxThreadLocalBracketSize)) { return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1; } else if (size <= kMaxRegularBracketSize) { return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1 + kNumThreadLocalSizeBrackets; } else if (size <= 1 * KB) { return kNumOfSizeBrackets - 2; } else { DCHECK_LE(size, 2 * KB); return kNumOfSizeBrackets - 1; } } // A combination of SizeToIndex() and RoundToBracketSize(). static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) { DCHECK(size <= kLargeSizeThreshold); size_t idx; size_t bracket_size; if (LIKELY(size <= kMaxThreadLocalBracketSize)) { bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize); idx = bracket_size / kThreadLocalBracketQuantumSize - 1; } else if (size <= kMaxRegularBracketSize) { bracket_size = RoundUp(size, kBracketQuantumSize); idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1) + kNumThreadLocalSizeBrackets; } else if (size <= 1 * KB) { bracket_size = 1 * KB; idx = kNumOfSizeBrackets - 2; } else { DCHECK(size <= 2 * KB); bracket_size = 2 * KB; idx = kNumOfSizeBrackets - 1; } DCHECK_EQ(idx, SizeToIndex(size)) << idx; DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx; DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx; DCHECK_LE(size, bracket_size) << idx; DCHECK(size > kMaxRegularBracketSize || (size <= kMaxThreadLocalBracketSize && bracket_size - size < kThreadLocalBracketQuantumSize) || (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx; *bracket_size_out = bracket_size; return idx; } // Returns the page map index from an address. Requires that the // address is page size aligned. size_t ToPageMapIndex(const void* addr) const { DCHECK_LE(base_, addr); DCHECK_LT(addr, base_ + capacity_); size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_; DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0)); return byte_offset / kPageSize; } // Returns the page map index from an address with rounding. size_t RoundDownToPageMapIndex(const void* addr) const { DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_); return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize; } // A memory allocation request larger than this size is treated as a large object and allocated // at a page-granularity. static const size_t kLargeSizeThreshold = 2048; // If true, check that the returned memory is actually zero. static constexpr bool kCheckZeroMemory = kIsDebugBuild; // Valgrind protects memory, so do not check memory when running under valgrind. In a normal // build with kCheckZeroMemory the whole test should be optimized away. // TODO: Unprotect before checks. ALWAYS_INLINE bool ShouldCheckZeroMemory(); // If true, log verbose details of operations. static constexpr bool kTraceRosAlloc = false; struct hash_run { size_t operator()(const RosAlloc::Run* r) const { return reinterpret_cast<size_t>(r); } }; struct eq_run { bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const { return r1 == r2; } }; public: // Different page release modes. enum PageReleaseMode { kPageReleaseModeNone, // Release no empty pages. kPageReleaseModeEnd, // Release empty pages at the end of the space. kPageReleaseModeSize, // Release empty pages that are larger than the threshold. kPageReleaseModeSizeAndEnd, // Release empty pages that are larger than the threshold or // at the end of the space. kPageReleaseModeAll, // Release all empty pages. }; // The default value for page_release_size_threshold_. static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB; // We use thread-local runs for the size brackets whose indexes // are less than this index. We use shared (current) runs for the rest. // Sync this with the length of Thread::rosalloc_runs_. static const size_t kNumThreadLocalSizeBrackets = 16; static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread, "Mismatch between kNumThreadLocalSizeBrackets and " "kNumRosAllocThreadLocalSizeBracketsInThread"); // The size of the largest bracket we use thread-local runs for. // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1]. static const size_t kMaxThreadLocalBracketSize = 128; // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than // this index. static const size_t kNumRegularSizeBrackets = 40; // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1]. static const size_t kMaxRegularBracketSize = 512; // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes). static constexpr size_t kThreadLocalBracketQuantumSize = 8; // Equal to Log2(kThreadLocalBracketQuantumSize). static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3; // The bracket size increment for the non-thread-local, regular brackets (of size <= // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes). static constexpr size_t kBracketQuantumSize = 16; // Equal to Log2(kBracketQuantumSize). static constexpr size_t kBracketQuantumSizeShift = 4; private: // The base address of the memory region that's managed by this allocator. uint8_t* base_; // The footprint in bytes of the currently allocated portion of the // memory region. size_t footprint_; // The maximum footprint. The address, base_ + capacity_, indicates // the end of the memory region that's currently managed by this allocator. size_t capacity_; // The maximum capacity. The address, base_ + max_capacity_, indicates // the end of the memory region that's ever managed by this allocator. size_t max_capacity_; // The run sets that hold the runs whose slots are not all // full. non_full_runs_[i] is guarded by size_bracket_locks_[i]. AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets]; // The run sets that hold the runs whose slots are all full. This is // debug only. full_runs_[i] is guarded by size_bracket_locks_[i]. std::unordered_set<Run*, hash_run, eq_run, TrackingAllocator<Run*, kAllocatorTagRosAlloc>> full_runs_[kNumOfSizeBrackets]; // The set of free pages. AllocationTrackingSet<FreePageRun*, kAllocatorTagRosAlloc> free_page_runs_ GUARDED_BY(lock_); // The dedicated full run, it is always full and shared by all threads when revoking happens. // This is an optimization since enables us to avoid a null check for revoked runs. static Run* dedicated_full_run_; // Using size_t to ensure that it is at least word aligned. static size_t dedicated_full_run_storage_[]; // The current runs where the allocations are first attempted for // the size brackes that do not use thread-local // runs. current_runs_[i] is guarded by size_bracket_locks_[i]. Run* current_runs_[kNumOfSizeBrackets]; // The mutexes, one per size bracket. Mutex* size_bracket_locks_[kNumOfSizeBrackets]; // Bracket lock names (since locks only have char* names). std::string size_bracket_lock_names_[kNumOfSizeBrackets]; // The types of page map entries. enum PageMapKind { kPageMapReleased = 0, // Zero and released back to the OS. kPageMapEmpty, // Zero but probably dirty. kPageMapRun, // The beginning of a run. kPageMapRunPart, // The non-beginning part of a run. kPageMapLargeObject, // The beginning of a large object. kPageMapLargeObjectPart, // The non-beginning part of a large object. }; // The table that indicates what pages are currently used for. volatile uint8_t* page_map_; // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree. size_t page_map_size_; size_t max_page_map_size_; std::unique_ptr<MemMap> page_map_mem_map_; // The table that indicates the size of free page runs. These sizes // are stored here to avoid storing in the free page header and // release backing pages. std::vector<size_t, TrackingAllocator<size_t, kAllocatorTagRosAlloc>> free_page_run_size_map_ GUARDED_BY(lock_); // The global lock. Used to guard the page map, the free page set, // and the footprint. Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; // The reader-writer lock to allow one bulk free at a time while // allowing multiple individual frees at the same time. Also, this // is used to avoid race conditions between BulkFree() and // RevokeThreadLocalRuns() on the bulk free list. ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; // The page release mode. const PageReleaseMode page_release_mode_; // Under kPageReleaseModeSize(AndEnd), if the free page run size is // greater than or equal to this value, release pages. const size_t page_release_size_threshold_; // Whether this allocator is running under Valgrind. bool is_running_on_memory_tool_; // The base address of the memory region that's managed by this allocator. uint8_t* Begin() { return base_; } // The end address of the memory region that's managed by this allocator. uint8_t* End() { return base_ + capacity_; } // Page-granularity alloc/free void* AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type) REQUIRES(lock_); // Returns how many bytes were freed. size_t FreePages(Thread* self, void* ptr, bool already_zero) REQUIRES(lock_); // Allocate/free a run slot. void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, size_t* bytes_tl_bulk_allocated) REQUIRES(!lock_); // Allocate/free a run slot without acquiring locks. // TODO: REQUIRES(Locks::mutator_lock_) void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, size_t* bytes_tl_bulk_allocated) REQUIRES(!lock_); void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx) REQUIRES(!lock_); // Returns the bracket size. size_t FreeFromRun(Thread* self, void* ptr, Run* run) REQUIRES(!lock_); // Used to allocate a new thread local run for a size bracket. Run* AllocRun(Thread* self, size_t idx) REQUIRES(!lock_); // Used to acquire a new/reused run for a size bracket. Used when a // thread-local or current run gets full. Run* RefillRun(Thread* self, size_t idx) REQUIRES(!lock_); // The internal of non-bulk Free(). size_t FreeInternal(Thread* self, void* ptr) REQUIRES(!lock_); // Allocates large objects. void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, size_t* bytes_tl_bulk_allocated) REQUIRES(!lock_); // Revoke a run by adding it to non_full_runs_ or freeing the pages. void RevokeRun(Thread* self, size_t idx, Run* run) REQUIRES(!lock_); // Revoke the current runs which share an index with the thread local runs. void RevokeThreadUnsafeCurrentRuns() REQUIRES(!lock_); // Release a range of pages. size_t ReleasePageRange(uint8_t* start, uint8_t* end) REQUIRES(lock_); // Dumps the page map for debugging. std::string DumpPageMap() REQUIRES(lock_); public: RosAlloc(void* base, size_t capacity, size_t max_capacity, PageReleaseMode page_release_mode, bool running_on_memory_tool, size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold); ~RosAlloc(); static size_t RunFreeListOffset() { return OFFSETOF_MEMBER(Run, free_list_); } static size_t RunFreeListHeadOffset() { return OFFSETOF_MEMBER(SlotFreeList<false>, head_); } static size_t RunFreeListSizeOffset() { return OFFSETOF_MEMBER(SlotFreeList<false>, size_); } static size_t RunSlotNextOffset() { return OFFSETOF_MEMBER(Slot, next_); } // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization. // If used, this may cause race conditions if multiple threads are allocating at the same time. template<bool kThreadSafe = true> void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, size_t* bytes_tl_bulk_allocated) REQUIRES(!lock_); size_t Free(Thread* self, void* ptr) REQUIRES(!bulk_free_lock_, !lock_); size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs) REQUIRES(!bulk_free_lock_, !lock_); // Returns true if the given allocation request can be allocated in // an existing thread local run without allocating a new run. ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size); // Allocate the given allocation request in an existing thread local // run without allocating a new run. ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated); // Returns the maximum bytes that could be allocated for the given // size in bulk, that is the maximum value for the // bytes_allocated_bulk out param returned by RosAlloc::Alloc(). ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size); // Returns the size of the allocated slot for a given allocated memory chunk. size_t UsableSize(const void* ptr) REQUIRES(!lock_); // Returns the size of the allocated slot for a given size. size_t UsableSize(size_t bytes) { if (UNLIKELY(bytes > kLargeSizeThreshold)) { return RoundUp(bytes, kPageSize); } else { return RoundToBracketSize(bytes); } } // Try to reduce the current footprint by releasing the free page // run at the end of the memory region, if any. bool Trim() REQUIRES(!lock_); // Iterates over all the memory slots and apply the given function. void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg) REQUIRES(!lock_); // Release empty pages. size_t ReleasePages() REQUIRES(!lock_); // Returns the current footprint. size_t Footprint() REQUIRES(!lock_); // Returns the current capacity, maximum footprint. size_t FootprintLimit() REQUIRES(!lock_); // Update the current capacity. void SetFootprintLimit(size_t bytes) REQUIRES(!lock_); // Releases the thread-local runs assigned to the given thread back to the common set of runs. // Returns the total bytes of free slots in the revoked thread local runs. This is to be // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. size_t RevokeThreadLocalRuns(Thread* thread) REQUIRES(!lock_, !bulk_free_lock_); // Releases the thread-local runs assigned to all the threads back to the common set of runs. // Returns the total bytes of free slots in the revoked thread local runs. This is to be // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. size_t RevokeAllThreadLocalRuns() REQUIRES(!Locks::thread_list_lock_, !lock_, !bulk_free_lock_); // Assert the thread local runs of a thread are revoked. void AssertThreadLocalRunsAreRevoked(Thread* thread) REQUIRES(!bulk_free_lock_); // Assert all the thread local runs are revoked. void AssertAllThreadLocalRunsAreRevoked() REQUIRES(!Locks::thread_list_lock_, !bulk_free_lock_); static Run* GetDedicatedFullRun() { return dedicated_full_run_; } bool IsFreePage(size_t idx) const { DCHECK_LT(idx, capacity_ / kPageSize); uint8_t pm_type = page_map_[idx]; return pm_type == kPageMapReleased || pm_type == kPageMapEmpty; } // Callbacks for InspectAll that will count the number of bytes // allocated and objects allocated, respectively. static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg); static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg); bool DoesReleaseAllPages() const { return page_release_mode_ == kPageReleaseModeAll; } // Verify for debugging. void Verify() REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !bulk_free_lock_, !lock_); void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) REQUIRES(!bulk_free_lock_, !lock_); void DumpStats(std::ostream& os) REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_) REQUIRES(!bulk_free_lock_); private: friend std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs); DISALLOW_COPY_AND_ASSIGN(RosAlloc); }; std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs); // Callback from rosalloc when it needs to increase the footprint. Must be implemented somewhere // else (currently rosalloc_space.cc). void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment); } // namespace allocator } // namespace gc } // namespace art #endif // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_