/*
* Copyright 2018 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "GrVkAMDMemoryAllocator.h"
#include "GrVkInterface.h"
#include "GrVkMemory.h"
#include "GrVkUtil.h"
GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice,
VkDevice device,
sk_sp<const GrVkInterface> interface)
: fAllocator(VK_NULL_HANDLE)
, fInterface(std::move(interface))
, fDevice(device) {
#define GR_COPY_FUNCTION(NAME) functions.vk##NAME = fInterface->fFunctions.f##NAME
VmaVulkanFunctions functions;
GR_COPY_FUNCTION(GetPhysicalDeviceProperties);
GR_COPY_FUNCTION(GetPhysicalDeviceMemoryProperties);
GR_COPY_FUNCTION(AllocateMemory);
GR_COPY_FUNCTION(FreeMemory);
GR_COPY_FUNCTION(MapMemory);
GR_COPY_FUNCTION(UnmapMemory);
GR_COPY_FUNCTION(BindBufferMemory);
GR_COPY_FUNCTION(BindImageMemory);
GR_COPY_FUNCTION(GetBufferMemoryRequirements);
GR_COPY_FUNCTION(GetImageMemoryRequirements);
GR_COPY_FUNCTION(CreateBuffer);
GR_COPY_FUNCTION(DestroyBuffer);
GR_COPY_FUNCTION(CreateImage);
GR_COPY_FUNCTION(DestroyImage);
// Skia current doesn't support VK_KHR_dedicated_allocation
functions.vkGetBufferMemoryRequirements2KHR = nullptr;
functions.vkGetImageMemoryRequirements2KHR = nullptr;
VmaAllocatorCreateInfo info;
info.flags = 0;
info.physicalDevice = physicalDevice;
info.device = device;
// Manually testing runs of dm using 64 here instead of the default 256 shows less memory usage
// on average. Also dm seems to run faster using 64 so it doesn't seem to be trading off speed
// for memory.
info.preferredLargeHeapBlockSize = 64*1024*1024;
info.pAllocationCallbacks = nullptr;
info.pDeviceMemoryCallbacks = nullptr;
info.frameInUseCount = 0;
info.pHeapSizeLimit = nullptr;
info.pVulkanFunctions = &functions;
vmaCreateAllocator(&info, &fAllocator);
}
GrVkAMDMemoryAllocator::~GrVkAMDMemoryAllocator() {
vmaDestroyAllocator(fAllocator);
fAllocator = VK_NULL_HANDLE;
}
bool GrVkAMDMemoryAllocator::allocateMemoryForImage(VkImage image, AllocationPropertyFlags flags,
GrVkBackendMemory* backendMemory) {
VmaAllocationCreateInfo info;
info.flags = 0;
info.usage = VMA_MEMORY_USAGE_UNKNOWN;
info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
info.preferredFlags = 0;
info.memoryTypeBits = 0;
info.pool = VK_NULL_HANDLE;
info.pUserData = nullptr;
if (AllocationPropertyFlags::kDedicatedAllocation & flags) {
info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
}
if (AllocationPropertyFlags::kLazyAllocation & flags) {
info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
}
VmaAllocation allocation;
VkResult result = vmaAllocateMemoryForImage(fAllocator, image, &info, &allocation, nullptr);
if (VK_SUCCESS != result) {
return false;
}
*backendMemory = (GrVkBackendMemory)allocation;
return true;
}
bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsage usage,
AllocationPropertyFlags flags,
GrVkBackendMemory* backendMemory) {
VmaAllocationCreateInfo info;
info.flags = 0;
info.usage = VMA_MEMORY_USAGE_UNKNOWN;
info.memoryTypeBits = 0;
info.pool = VK_NULL_HANDLE;
info.pUserData = nullptr;
switch (usage) {
case BufferUsage::kGpuOnly:
info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
info.preferredFlags = 0;
break;
case BufferUsage::kCpuOnly:
info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
break;
case BufferUsage::kCpuWritesGpuReads:
// First attempt to try memory is also cached
info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
break;
case BufferUsage::kGpuWritesCpuReads:
info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
info.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
break;
}
if (AllocationPropertyFlags::kDedicatedAllocation & flags) {
info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
}
if ((AllocationPropertyFlags::kLazyAllocation & flags) && BufferUsage::kGpuOnly == usage) {
info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
}
if (AllocationPropertyFlags::kPersistentlyMapped & flags) {
SkASSERT(BufferUsage::kGpuOnly != usage);
info.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
}
VmaAllocation allocation;
VkResult result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
if (VK_SUCCESS != result) {
if (usage == BufferUsage::kCpuWritesGpuReads) {
// We try again but this time drop the requirement for cached
info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
}
}
if (VK_SUCCESS != result) {
return false;
}
*backendMemory = (GrVkBackendMemory)allocation;
return true;
}
void GrVkAMDMemoryAllocator::freeMemory(const GrVkBackendMemory& memoryHandle) {
const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
vmaFreeMemory(fAllocator, allocation);
}
void GrVkAMDMemoryAllocator::getAllocInfo(const GrVkBackendMemory& memoryHandle,
GrVkAlloc* alloc) const {
const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
VmaAllocationInfo vmaInfo;
vmaGetAllocationInfo(fAllocator, allocation, &vmaInfo);
VkMemoryPropertyFlags memFlags;
vmaGetMemoryTypeProperties(fAllocator, vmaInfo.memoryType, &memFlags);
uint32_t flags = 0;
if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & memFlags) {
flags |= GrVkAlloc::kMappable_Flag;
}
if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) {
flags |= GrVkAlloc::kNoncoherent_Flag;
}
alloc->fMemory = vmaInfo.deviceMemory;
alloc->fOffset = vmaInfo.offset;
alloc->fSize = vmaInfo.size;
alloc->fFlags = flags;
alloc->fBackendMemory = memoryHandle;
// TODO: Remove this hack once the AMD allocator is able to handle the alignment of noncoherent
// memory itself.
if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) {
// This is a hack to say that the allocation size is actually larger than it is. This is to
// make sure when we are flushing and invalidating noncoherent memory we have a size that is
// aligned to the nonCoherentAtomSize. This is safe for three reasons. First the total size
// of the VkDeviceMemory we allocate will always be a multple of the max possible alignment
// (currently 256). Second all sub allocations are alignmed with an offset of 256. And
// finally the allocator we are using always maps the entire VkDeviceMemory so the range
// we'll be flushing/invalidating will be mapped. So our new fake allocation size will
// always fit into the VkDeviceMemory, will never push it into another suballocation, and
// will always be mapped when map is called.
const VkPhysicalDeviceProperties* devProps;
vmaGetPhysicalDeviceProperties(fAllocator, &devProps);
VkDeviceSize alignment = devProps->limits.nonCoherentAtomSize;
alloc->fSize = (alloc->fSize + alignment - 1) & ~(alignment -1);
}
}
void* GrVkAMDMemoryAllocator::mapMemory(const GrVkBackendMemory& memoryHandle) {
const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
void* mapPtr;
vmaMapMemory(fAllocator, allocation, &mapPtr);
return mapPtr;
}
void GrVkAMDMemoryAllocator::unmapMemory(const GrVkBackendMemory& memoryHandle) {
const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
vmaUnmapMemory(fAllocator, allocation);
}
void GrVkAMDMemoryAllocator::flushMappedMemory(const GrVkBackendMemory& memoryHandle,
VkDeviceSize offset, VkDeviceSize size) {
GrVkAlloc info;
this->getAllocInfo(memoryHandle, &info);
if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) {
// We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment.
const VkPhysicalDeviceProperties* physDevProps;
vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
VkMappedMemoryRange mappedMemoryRange;
GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
&mappedMemoryRange);
GR_VK_CALL(fInterface, FlushMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
}
}
void GrVkAMDMemoryAllocator::invalidateMappedMemory(const GrVkBackendMemory& memoryHandle,
VkDeviceSize offset, VkDeviceSize size) {
GrVkAlloc info;
this->getAllocInfo(memoryHandle, &info);
if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) {
// We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment.
const VkPhysicalDeviceProperties* physDevProps;
vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
VkMappedMemoryRange mappedMemoryRange;
GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
&mappedMemoryRange);
GR_VK_CALL(fInterface, InvalidateMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
}
}
uint64_t GrVkAMDMemoryAllocator::totalUsedMemory() const {
VmaStats stats;
vmaCalculateStats(fAllocator, &stats);
return stats.total.usedBytes;
}
uint64_t GrVkAMDMemoryAllocator::totalAllocatedMemory() const {
VmaStats stats;
vmaCalculateStats(fAllocator, &stats);
return stats.total.usedBytes + stats.total.unusedBytes;
}