5#include "Containers/Array.h"
7#include "HAL/PlatformAtomics.h"
8#include "HAL/PlatformMemory.h"
9#include "Math/UnrealMathUtility.h"
10#include "Templates/Atomic.h"
11#include "Templates/MemoryOps.h"
14#include "HAL/Allocators/CachedOSPageAllocator.h"
15#include "HAL/Allocators/PooledVirtualMemoryAllocator.h"
16#include "HAL/CriticalSection.h"
17#include "HAL/LowLevelMemTracker.h"
18#include "HAL/MallocBinnedCommon.h"
19#include "HAL/MemoryBase.h"
20#include "HAL/PlatformMath.h"
21#include "HAL/PlatformTLS.h"
22#include "HAL/UnrealMemory.h"
23#include "Math/NumericLimits.h"
24#include "Misc/AssertionMacros.h"
25#include "Misc/ScopeLock.h"
26#include "Misc/ScopeLock.h"
27#include "Templates/AlignmentTemplates.h"
30#define BINNEDGPU_MAX_GMallocBinnedGPUMaxBundlesBeforeRecycle (8
)
34#if COLLECT_BINNEDGPU_STATS
40PRAGMA_DISABLE_UNSAFE_TYPECAST_WARNINGS
42class FMallocBinnedGPU final :
public FMalloc
44 struct FGlobalRecycler;
45 struct FPoolInfoLarge;
46 struct FPoolInfoSmall;
48 struct PoolHashBucket;
52 struct FGPUMemoryBlockProxy
54 uint8 MemoryModifiedByCPU[32 -
sizeof(
void*)];
57 FGPUMemoryBlockProxy(
void *InGPUMemory)
58 : GPUMemory(InGPUMemory)
71 FORCEINLINE FFreeBlock(uint32 InPageSize, uint32 InBlockSize, uint32 InPoolIndex, uint8 MinimumAlignmentShift)
72 : BlockSizeShifted(InBlockSize >> MinimumAlignmentShift)
73 , PoolIndex(InPoolIndex)
74 , Canary(CANARY_VALUE)
75 , NextFreeBlock(
nullptr)
78 NumFreeBlocks = InPageSize / InBlockSize;
87 return Canary == FFreeBlock::CANARY_VALUE;
97 void CanaryFail()
const;
99 FORCEINLINE void* AllocateRegularBlock(uint8 MinimumAlignmentShift)
102 return (uint8*)(((FGPUMemoryBlockProxy*)
this)->GPUMemory) + NumFreeBlocks * (uint32(BlockSizeShifted) << MinimumAlignmentShift);
105 uint16 BlockSizeShifted;
108 uint32 NumFreeBlocks;
109 FFreeBlock* NextFreeBlock;
115 uint16 BlocksPerBlockOfBlocks;
116 uint8 PagesPlatformForBlockOfBlocks;
118 FBitTree BlockOfBlockAllocationBits;
119 FBitTree BlockOfBlockIsExhausted;
121 uint32 NumEverUsedBlockOfBlocks;
122 FPoolInfoSmall** PoolInfos;
124 uint64 UnusedAreaOffsetLow;
127 struct FPtrToPoolMapping
130 : PtrToPoolPageBitShift(0)
136 explicit FPtrToPoolMapping(uint32 InPageSize, uint64 InNumPoolsPerPage, uint64 AddressLimit)
138 Init(InPageSize, InNumPoolsPerPage, AddressLimit);
141 void Init(uint32 InPageSize, uint64 InNumPoolsPerPage, uint64 AddressLimit)
143 uint64 PoolPageToPoolBitShift = FPlatformMath::CeilLogTwo(InNumPoolsPerPage);
145 PtrToPoolPageBitShift = FPlatformMath::CeilLogTwo(InPageSize);
146 HashKeyShift = PtrToPoolPageBitShift + PoolPageToPoolBitShift;
147 PoolMask = (1ull << PoolPageToPoolBitShift) - 1;
148 MaxHashBuckets = AddressLimit >> HashKeyShift;
151 FORCEINLINE void GetHashBucketAndPoolIndices(
const void* InPtr, uint32& OutBucketIndex, UPTRINT& OutBucketCollision, uint32& OutPoolIndex)
const
153 OutBucketCollision = (UPTRINT)InPtr >> HashKeyShift;
154 OutBucketIndex = uint32(OutBucketCollision & (MaxHashBuckets - 1));
155 OutPoolIndex = ((UPTRINT)InPtr >> PtrToPoolPageBitShift) & PoolMask;
160 return MaxHashBuckets;
165 uint64 PtrToPoolPageBitShift;
171 uint64 MaxHashBuckets;
176 FBundleNode* NextNodeInCurrentBundle;
179 FBundleNode* NextBundle;
199 Node->NextNodeInCurrentBundle = Head;
200 Node->NextBundle =
nullptr;
207 FBundleNode* Result = Head;
210 Head = Head->NextNodeInCurrentBundle;
218 struct FFreeBlockList
221 FORCEINLINE bool PushToFront(FMallocBinnedGPU& Allocator,
void* InPtr, uint32 InPoolIndex, uint32 InBlockSize,
const FArenaParams& LocalArenaParams)
225 if ((PartialBundle.Count >= (uint32)LocalArenaParams.MaxBlocksPerBundle) | (PartialBundle.Count * InBlockSize >= (uint32)LocalArenaParams.MaxSizePerBundle))
231 FullBundle = PartialBundle;
232 PartialBundle.Reset();
234 PartialBundle.PushHead((FBundleNode*)
new FGPUMemoryBlockProxy(InPtr));
235 MBG_STAT(Allocator.GPUProxyMemory +=
sizeof(FGPUMemoryBlockProxy);)
238 FORCEINLINE bool CanPushToFront(uint32 InPoolIndex, uint32 InBlockSize,
const FArenaParams& LocalArenaParams)
240 return !((!!FullBundle.Head) & ((PartialBundle.Count >= (uint32)LocalArenaParams.MaxBlocksPerBundle) | (PartialBundle.Count * InBlockSize >= (uint32)LocalArenaParams.MaxSizePerBundle)));
242 FORCEINLINE void* PopFromFront(FMallocBinnedGPU& Allocator, uint32 InPoolIndex)
244 if ((!PartialBundle.Head) & (!!FullBundle.Head))
246 PartialBundle = FullBundle;
249 void *Result =
nullptr;
250 if (PartialBundle.Head)
252 FGPUMemoryBlockProxy* Proxy = (FGPUMemoryBlockProxy*)PartialBundle.PopHead();
253 Result = Proxy->GPUMemory;
256 MBG_STAT(Allocator.GPUProxyMemory -=
sizeof(FGPUMemoryBlockProxy);)
262 FBundleNode* RecyleFull(FArenaParams& LocalArenaParams, FGlobalRecycler& GGlobalRecycler, uint32 InPoolIndex);
263 bool ObtainPartial(FArenaParams& LocalArenaParams, FGlobalRecycler& GGlobalRecycler, uint32 InPoolIndex);
264 FBundleNode* PopBundles(uint32 InPoolIndex);
266 FBundle PartialBundle;
270 struct FPerThreadFreeBlockLists
272 FORCEINLINE static FPerThreadFreeBlockLists* Get(uint32 BinnedGPUTlsSlot)
274 return BinnedGPUTlsSlot ? (FPerThreadFreeBlockLists*)FPlatformTLS::GetTlsValue(BinnedGPUTlsSlot) :
nullptr;
276 static void SetTLS(FMallocBinnedGPU& Allocator);
277 static int64 ClearTLS(FMallocBinnedGPU& Allocator);
279 FPerThreadFreeBlockLists(uint32 PoolCount)
282 FreeLists.AddDefaulted(PoolCount);
285 FORCEINLINE void* Malloc(FMallocBinnedGPU& Allocator, uint32 InPoolIndex)
287 return FreeLists[InPoolIndex].PopFromFront(Allocator, InPoolIndex);
290 FORCEINLINE bool Free(FMallocBinnedGPU& Allocator,
void* InPtr, uint32 InPoolIndex, uint32 InBlockSize,
const FArenaParams& LocalArenaParams)
292 return FreeLists[InPoolIndex].PushToFront(Allocator, InPtr, InPoolIndex, InBlockSize, LocalArenaParams);
295 FORCEINLINE bool CanFree(uint32 InPoolIndex, uint32 InBlockSize,
const FArenaParams& LocalArenaParams)
297 return FreeLists[InPoolIndex].CanPushToFront(InPoolIndex, InBlockSize, LocalArenaParams);
300 FBundleNode* RecycleFullBundle(FArenaParams& LocalArenaParams, FGlobalRecycler& GlobalRecycler, uint32 InPoolIndex)
302 return FreeLists[InPoolIndex].RecyleFull(LocalArenaParams, GlobalRecycler, InPoolIndex);
305 bool ObtainRecycledPartial(FArenaParams& LocalArenaParams, FGlobalRecycler& GlobalRecycler, uint32 InPoolIndex)
307 return FreeLists[InPoolIndex].ObtainPartial(LocalArenaParams, GlobalRecycler, InPoolIndex);
309 FBundleNode* PopBundles(uint32 InPoolIndex)
311 return FreeLists[InPoolIndex].PopBundles(InPoolIndex);
313 int64 AllocatedMemory;
314 TArray<FFreeBlockList> FreeLists;
317 struct FGlobalRecycler
319 void Init(uint32 PoolCount)
321 Bundles.AddDefaulted(PoolCount);
323 bool PushBundle(uint32 NumCachedBundles, uint32 InPoolIndex, FBundleNode* InBundle)
325 for (uint32 Slot = 0; Slot < NumCachedBundles && Slot < BINNEDGPU_MAX_GMallocBinnedGPUMaxBundlesBeforeRecycle; Slot++)
327 if (!Bundles[InPoolIndex].FreeBundles[Slot])
329 if (!FPlatformAtomics::InterlockedCompareExchangePointer((
void**)&Bundles[InPoolIndex].FreeBundles[Slot], InBundle,
nullptr))
338 FBundleNode* PopBundle(uint32 NumCachedBundles, uint32 InPoolIndex)
340 for (uint32 Slot = 0; Slot < NumCachedBundles && Slot < BINNEDGPU_MAX_GMallocBinnedGPUMaxBundlesBeforeRecycle; Slot++)
342 FBundleNode* Result = Bundles[InPoolIndex].FreeBundles[Slot];
345 if (FPlatformAtomics::InterlockedCompareExchangePointer((
void**)&Bundles[InPoolIndex].FreeBundles[Slot],
nullptr, Result) == Result)
355 struct FPaddedBundlePointer
357 FBundleNode* FreeBundles[BINNEDGPU_MAX_GMallocBinnedGPUMaxBundlesBeforeRecycle];
358 FPaddedBundlePointer()
360 DefaultConstructItems<FBundleNode*>(FreeBundles, BINNEDGPU_MAX_GMallocBinnedGPUMaxBundlesBeforeRecycle);
363 TArray<FPaddedBundlePointer> Bundles;
367 FORCEINLINE uint64 PoolIndexFromPtr(
const void* Ptr)
369 if (PoolSearchDiv == 0)
371 return (UPTRINT(Ptr) - UPTRINT(PoolBaseVMPtr[0])) >> ArenaParams.MaxMemoryPerBlockSizeShift;
373 uint64 PoolIndex = ArenaParams.PoolCount;
374 if (((uint8*)Ptr >= PoolBaseVMPtr[0]) & ((uint8*)Ptr < HighestPoolBaseVMPtr + ArenaParams.MaxMemoryPerBlockSize))
376 PoolIndex = uint64((uint8*)Ptr - PoolBaseVMPtr[0]) / PoolSearchDiv;
377 if (PoolIndex >= ArenaParams.PoolCount)
379 PoolIndex = ArenaParams.PoolCount - 1;
381 if ((uint8*)Ptr < PoolBaseVMPtr[(int32)PoolIndex])
386 check(PoolIndex < ArenaParams.PoolCount);
387 }
while ((uint8*)Ptr < PoolBaseVMPtr[(int32)PoolIndex]);
388 if ((uint8*)Ptr >= PoolBaseVMPtr[(int32)PoolIndex] + ArenaParams.MaxMemoryPerBlockSize)
390 PoolIndex = ArenaParams.PoolCount;
393 else if ((uint8*)Ptr >= PoolBaseVMPtr[(int32)PoolIndex] + ArenaParams.MaxMemoryPerBlockSize)
398 check(PoolIndex < ArenaParams.PoolCount);
399 }
while ((uint8*)Ptr >= PoolBaseVMPtr[(int32)PoolIndex] + ArenaParams.MaxMemoryPerBlockSize);
400 if ((uint8*)Ptr < PoolBaseVMPtr[(int32)PoolIndex])
402 PoolIndex = ArenaParams.PoolCount;
411 return PoolBaseVMPtr[InPoolIndex];
413 FORCEINLINE uint64 PoolIndexFromPtrChecked(
const void* Ptr)
415 uint64 Result = PoolIndexFromPtr(Ptr);
416 check(Result < ArenaParams.PoolCount);
422 return PoolIndexFromPtr(Ptr) >= ArenaParams.PoolCount;
426 FORCEINLINE void* BlockOfBlocksPointerFromContainedPtr(
const void* Ptr, uint8 PagesPlatformForBlockOfBlocks, uint32& OutBlockOfBlocksIndex)
428 uint32 PoolIndex = PoolIndexFromPtrChecked(Ptr);
429 uint8* PoolStart = PoolBasePtr(PoolIndex);
430 uint64 BlockOfBlocksIndex = (UPTRINT(Ptr) - UPTRINT(PoolStart)) / (UPTRINT(PagesPlatformForBlockOfBlocks) * UPTRINT(ArenaParams.AllocationGranularity));
431 OutBlockOfBlocksIndex = BlockOfBlocksIndex;
433 uint8* Result = PoolStart + BlockOfBlocksIndex * UPTRINT(PagesPlatformForBlockOfBlocks) * UPTRINT(ArenaParams.AllocationGranularity);
435 check(Result < PoolStart + ArenaParams.MaxMemoryPerBlockSize);
438 FORCEINLINE uint8* BlockPointerFromIndecies(uint32 InPoolIndex, uint32 BlockOfBlocksIndex, uint32 BlockOfBlocksSize)
440 uint8* PoolStart = PoolBasePtr(InPoolIndex);
441 uint8* Ptr = PoolStart + BlockOfBlocksIndex * uint64(BlockOfBlocksSize);
442 check(Ptr + BlockOfBlocksSize <= PoolStart + ArenaParams.MaxMemoryPerBlockSize);
445 FPoolInfoSmall* PushNewPoolToFront(FMallocBinnedGPU& Allocator, uint32 InBlockSize, uint32 InPoolIndex, uint32& OutBlockOfBlocksIndex);
446 FPoolInfoSmall* GetFrontPool(FPoolTable& Table, uint32 InPoolIndex, uint32& OutBlockOfBlocksIndex);
448 FORCEINLINE bool AdjustSmallBlockSizeForAlignment(SIZE_T& InOutSize, uint32 Alignment)
450 if ((InOutSize <= ArenaParams.MaxPoolSize) & (Alignment <= ArenaParams.MinimumAlignment))
454 SIZE_T AlignedSize = Align(InOutSize, Alignment);
455 if (ArenaParams.bAttemptToAlignSmallBocks & (AlignedSize <= ArenaParams.MaxPoolSize) & (Alignment <= ArenaParams.MaximumAlignmentForSmallBlock))
457 uint32 PoolIndex = BoundSizeToPoolIndex(AlignedSize);
460 uint32 BlockSize = PoolIndexToBlockSize(PoolIndex);
461 if (IsAligned(BlockSize, Alignment))
463 InOutSize = SIZE_T(BlockSize);
467 check(PoolIndex < ArenaParams.PoolCount);
477 FArenaParams& GetParams()
481 void InitMallocBinned();
483 virtual ~FMallocBinnedGPU();
487 virtual bool IsInternallyThreadSafe()
const override;
488 FORCEINLINE virtual void* Malloc(SIZE_T Size, uint32 Alignment) override
490 Alignment = FMath::Max<uint32>(Alignment, ArenaParams.MinimumAlignment);
492 void* Result =
nullptr;
496 if (AdjustSmallBlockSizeForAlignment(Size, Alignment))
498 FPerThreadFreeBlockLists* Lists = ArenaParams.bPerThreadCaches ? FPerThreadFreeBlockLists::Get(BinnedGPUTlsSlot) :
nullptr;
501 uint32 PoolIndex = BoundSizeToPoolIndex(Size);
502 uint32 BlockSize = PoolIndexToBlockSize(PoolIndex);
503 Result = Lists->Malloc(*
this, PoolIndex);
506 Lists->AllocatedMemory += BlockSize;
511 if (Result ==
nullptr)
513 Result = MallocExternal(Size, Alignment);
518 FORCEINLINE virtual void* Realloc(
void* Ptr, SIZE_T NewSize, uint32 Alignment) override
520 check(!
"MallocBinnedGPU cannot realloc memory because the memory is assumed to not be writable by the CPU");
526 uint64 PoolIndex = PoolIndexFromPtr(Ptr);
527 if (PoolIndex < ArenaParams.PoolCount)
529 FPerThreadFreeBlockLists* Lists = ArenaParams.bPerThreadCaches ? FPerThreadFreeBlockLists::Get(BinnedGPUTlsSlot) :
nullptr;
532 int32 BlockSize = PoolIndexToBlockSize(PoolIndex);
533 if (Lists->Free(*
this, Ptr, PoolIndex, BlockSize, ArenaParams))
535 Lists->AllocatedMemory -= BlockSize;
542 FORCEINLINE virtual bool GetAllocationSize(
void *Ptr, SIZE_T &SizeOut) override
544 uint64 PoolIndex = PoolIndexFromPtr(Ptr);
545 if (PoolIndex < ArenaParams.PoolCount)
547 SizeOut = PoolIndexToBlockSize(PoolIndex);
550 return GetAllocationSizeExternal(Ptr, SizeOut);
553 FORCEINLINE virtual SIZE_T QuantizeSize(SIZE_T Count, uint32 Alignment) override
555 check(DEFAULT_ALIGNMENT <= ArenaParams.MinimumAlignment);
556 checkSlow((Alignment & (Alignment - 1)) == 0);
558 if ((Count <= ArenaParams.MaxPoolSize) & (Alignment <= ArenaParams.MinimumAlignment))
560 SizeOut = PoolIndexToBlockSize(BoundSizeToPoolIndex(Count));
564 Alignment = FPlatformMath::Max<uint32>(Alignment, ArenaParams.AllocationGranularity);
565 SizeOut = Align(Count, Alignment);
567 check(SizeOut >= Count);
571 virtual bool ValidateHeap() override;
572 virtual void Trim(
bool bTrimThreadCaches) override;
573 virtual void SetupTLSCachesOnCurrentThread() override;
574 virtual void ClearAndDisableTLSCachesOnCurrentThread() override;
575 virtual const TCHAR* GetDescriptiveName() override;
578 void FlushCurrentThreadCache();
579 void* MallocExternal(SIZE_T Size, uint32 Alignment);
580 void FreeExternal(
void *Ptr);
581 bool GetAllocationSizeExternal(
void* Ptr, SIZE_T& SizeOut);
583 MBG_STAT(int64 GetTotalAllocatedSmallPoolMemory();)
584 virtual void GetAllocatorStats(FGenericMemoryStats& out_Stats) override;
586 virtual void DumpAllocatorStats(
class FOutputDevice& Ar) override;
588 FORCEINLINE uint32 BoundSizeToPoolIndex(SIZE_T Size)
590 auto Index = ((Size + ArenaParams.MinimumAlignment - 1) >> ArenaParams.MinimumAlignmentShift);
591 checkSlow(Index >= 0 && Index <= (ArenaParams.MaxPoolSize >> ArenaParams.MinimumAlignmentShift));
592 uint32 PoolIndex = uint32(MemSizeToIndex[Index]);
593 checkSlow(PoolIndex >= 0 && PoolIndex < ArenaParams.PoolCount);
596 FORCEINLINE uint32 PoolIndexToBlockSize(uint32 PoolIndex)
598 return uint32(SmallBlockSizesReversedShifted[ArenaParams.PoolCount - PoolIndex - 1]) << ArenaParams.MinimumAlignmentShift;
601 void Commit(uint32 InPoolIndex,
void *Ptr, SIZE_T Size);
602 void Decommit(uint32 InPoolIndex,
void *Ptr, SIZE_T Size);
606 TArray<FPoolTable> SmallPoolTables;
608 uint32 SmallPoolInfosPerPlatformPage;
610 PoolHashBucket* HashBuckets;
611 PoolHashBucket* HashBucketFreeList;
612 uint64 NumLargePoolsPerPage;
614 FCriticalSection Mutex;
615 FGlobalRecycler GGlobalRecycler;
616 FPtrToPoolMapping PtrToPoolMapping;
618 FArenaParams ArenaParams;
620 TArray<uint16> SmallBlockSizesReversedShifted;
621 uint32 BinnedGPUTlsSlot;
622 uint64 PoolSearchDiv;
623 uint8* HighestPoolBaseVMPtr;
624 FPlatformMemory::FPlatformVirtualMemoryBlock PoolBaseVMBlock;
625 TArray<uint8*> PoolBaseVMPtr;
626 TArray<FPlatformMemory::FPlatformVirtualMemoryBlock> PoolBaseVMBlocks;
628 TArray<uint8> MemSizeToIndex;
631 int64 BinnedGPUAllocatedSmallPoolMemory = 0;
632 int64 BinnedGPUAllocatedOSSmallPoolMemory = 0;
634 int64 BinnedGPUAllocatedLargePoolMemory = 0;
635 int64 BinnedGPUAllocatedLargePoolMemoryWAlignment = 0;
637 int64 BinnedGPUPoolInfoMemory = 0;
638 int64 BinnedGPUHashMemory = 0;
639 int64 BinnedGPUFreeBitsMemory = 0;
640 int64 BinnedGPUTLSMemory = 0;
641 TAtomic<int64> ConsolidatedMemory;
642 TAtomic<int64> GPUProxyMemory;
645 FCriticalSection FreeBlockListsRegistrationMutex;
646 FCriticalSection& GetFreeBlockListsRegistrationMutex()
648 return FreeBlockListsRegistrationMutex;
650 TArray<FPerThreadFreeBlockLists*> RegisteredFreeBlockLists;
651 TArray<FPerThreadFreeBlockLists*>& GetRegisteredFreeBlockLists()
653 return RegisteredFreeBlockLists;
655 void RegisterThreadFreeBlockLists(FPerThreadFreeBlockLists* FreeBlockLists)
657 FScopeLock Lock(&GetFreeBlockListsRegistrationMutex());
658 GetRegisteredFreeBlockLists().Add(FreeBlockLists);
660 int64 UnregisterThreadFreeBlockLists(FPerThreadFreeBlockLists* FreeBlockLists)
662 FScopeLock Lock(&GetFreeBlockListsRegistrationMutex());
663 GetRegisteredFreeBlockLists().Remove(FreeBlockLists);
664 return FreeBlockLists->AllocatedMemory;
667 TArray<
void*> MallocedPointers;
670PRAGMA_RESTORE_UNSAFE_TYPECAST_WARNINGS
#define UE_BUILD_SHIPPING