5#include "Containers/Array.h"
7#include "HAL/PlatformAtomics.h"
8#include "HAL/PlatformMemory.h"
9#include "Math/UnrealMathUtility.h"
10#include "Templates/Atomic.h"
11#include "Templates/MemoryOps.h"
14#include "HAL/Allocators/CachedOSPageAllocator.h"
15#include "HAL/Allocators/PooledVirtualMemoryAllocator.h"
16#include "HAL/CriticalSection.h"
17#include "HAL/LowLevelMemTracker.h"
18#include "HAL/MallocBinnedCommon.h"
19#include "HAL/MemoryBase.h"
20#include "HAL/PlatformMath.h"
21#include "HAL/PlatformTLS.h"
22#include "HAL/UnrealMemory.h"
23#include "Math/NumericLimits.h"
24#include "Misc/AssertionMacros.h"
25#include "Misc/ScopeLock.h"
26#include "Misc/ScopeLock.h"
27#include "Templates/AlignmentTemplates.h"
30#define BINNEDARENA_MAX_GMallocBinnedArenaMaxBundlesBeforeRecycle (8
)
34#if COLLECT_BINNEDARENA_STATS
40PRAGMA_DISABLE_UNSAFE_TYPECAST_WARNINGS
42class FMallocBinnedArena final :
public FMalloc
44 struct FGlobalRecycler;
45 struct FPoolInfoLarge;
46 struct FPoolInfoSmall;
48 struct PoolHashBucket;
59 FORCEINLINE FFreeBlock(uint32 InPageSize, uint32 InBlockSize, uint32 InPoolIndex, uint8 MinimumAlignmentShift)
60 : BlockSizeShifted(InBlockSize >> MinimumAlignmentShift)
61 , PoolIndex(InPoolIndex)
62 , Canary(CANARY_VALUE)
66 NumFreeBlocks = InPageSize / InBlockSize;
75 return Canary == FFreeBlock::CANARY_VALUE;
85 void CanaryFail()
const;
87 FORCEINLINE void* AllocateRegularBlock(uint8 MinimumAlignmentShift)
90 return (uint8*)
this + NumFreeBlocks * (uint32(BlockSizeShifted) << MinimumAlignmentShift);
93 uint16 BlockSizeShifted;
103 uint16 BlocksPerBlockOfBlocks;
104 uint8 PagesPlatformForBlockOfBlocks;
106 FBitTree BlockOfBlockAllocationBits;
107 FBitTree BlockOfBlockIsExhausted;
109 uint32 NumEverUsedBlockOfBlocks;
110 FPoolInfoSmall** PoolInfos;
112 uint64 UnusedAreaOffsetLow;
115 struct FPtrToPoolMapping
118 : PtrToPoolPageBitShift(0)
124 explicit FPtrToPoolMapping(uint32 InPageSize, uint64 InNumPoolsPerPage, uint64 AddressLimit)
126 Init(InPageSize, InNumPoolsPerPage, AddressLimit);
129 void Init(uint32 InPageSize, uint64 InNumPoolsPerPage, uint64 AddressLimit)
131 uint64 PoolPageToPoolBitShift = FPlatformMath::CeilLogTwo(InNumPoolsPerPage);
133 PtrToPoolPageBitShift = FPlatformMath::CeilLogTwo(InPageSize);
134 HashKeyShift = PtrToPoolPageBitShift + PoolPageToPoolBitShift;
135 PoolMask = (1ull << PoolPageToPoolBitShift) - 1;
136 MaxHashBuckets = AddressLimit >> HashKeyShift;
139 FORCEINLINE void GetHashBucketAndPoolIndices(
const void* InPtr, uint32& OutBucketIndex, UPTRINT& OutBucketCollision, uint32& OutPoolIndex)
const
141 OutBucketCollision = (UPTRINT)InPtr >> HashKeyShift;
142 OutBucketIndex = uint32(OutBucketCollision & (MaxHashBuckets - 1));
143 OutPoolIndex = ((UPTRINT)InPtr >> PtrToPoolPageBitShift) & PoolMask;
148 return MaxHashBuckets;
153 uint64 PtrToPoolPageBitShift;
159 uint64 MaxHashBuckets;
164 FBundleNode* NextNodeInCurrentBundle;
167 FBundleNode* NextBundle;
187 Node->NextNodeInCurrentBundle = Head;
188 Node->NextBundle =
nullptr;
195 FBundleNode* Result = Head;
198 Head = Head->NextNodeInCurrentBundle;
206 struct FFreeBlockList
209 FORCEINLINE bool PushToFront(
void* InPtr, uint32 InPoolIndex, uint32 InBlockSize,
const FArenaParams& LocalArenaParams)
213 if ((PartialBundle.Count >= (uint32)LocalArenaParams.MaxBlocksPerBundle) | (PartialBundle.Count * InBlockSize >= (uint32)LocalArenaParams.MaxSizePerBundle))
219 FullBundle = PartialBundle;
220 PartialBundle.Reset();
222 PartialBundle.PushHead((FBundleNode*)InPtr);
225 FORCEINLINE bool CanPushToFront(uint32 InPoolIndex, uint32 InBlockSize,
const FArenaParams& LocalArenaParams)
227 return !((!!FullBundle.Head) & ((PartialBundle.Count >= (uint32)LocalArenaParams.MaxBlocksPerBundle) | (PartialBundle.Count * InBlockSize >= (uint32)LocalArenaParams.MaxSizePerBundle)));
231 if ((!PartialBundle.Head) & (!!FullBundle.Head))
233 PartialBundle = FullBundle;
236 return PartialBundle.Head ? PartialBundle.PopHead() :
nullptr;
240 FBundleNode* RecyleFull(FArenaParams& LocalArenaParams, FGlobalRecycler& GGlobalRecycler, uint32 InPoolIndex);
241 bool ObtainPartial(FArenaParams& LocalArenaParams, FGlobalRecycler& GGlobalRecycler, uint32 InPoolIndex);
242 FBundleNode* PopBundles(uint32 InPoolIndex);
244 FBundle PartialBundle;
248 struct FPerThreadFreeBlockLists
250 FORCEINLINE static FPerThreadFreeBlockLists* Get(uint32 BinnedArenaTlsSlot)
252 return BinnedArenaTlsSlot ? (FPerThreadFreeBlockLists*)FPlatformTLS::GetTlsValue(BinnedArenaTlsSlot) :
nullptr;
254 static void SetTLS(FMallocBinnedArena& Allocator);
255 static int64 ClearTLS(FMallocBinnedArena& Allocator);
257 FPerThreadFreeBlockLists(uint32 PoolCount)
260 FreeLists.AddDefaulted(PoolCount);
265 return FreeLists[InPoolIndex].PopFromFront(InPoolIndex);
268 FORCEINLINE bool Free(
void* InPtr, uint32 InPoolIndex, uint32 InBlockSize,
const FArenaParams& LocalArenaParams)
270 return FreeLists[InPoolIndex].PushToFront(InPtr, InPoolIndex, InBlockSize, LocalArenaParams);
273 FORCEINLINE bool CanFree(uint32 InPoolIndex, uint32 InBlockSize,
const FArenaParams& LocalArenaParams)
275 return FreeLists[InPoolIndex].CanPushToFront(InPoolIndex, InBlockSize, LocalArenaParams);
278 FBundleNode* RecycleFullBundle(FArenaParams& LocalArenaParams, FGlobalRecycler& GlobalRecycler, uint32 InPoolIndex)
280 return FreeLists[InPoolIndex].RecyleFull(LocalArenaParams, GlobalRecycler, InPoolIndex);
283 bool ObtainRecycledPartial(FArenaParams& LocalArenaParams, FGlobalRecycler& GlobalRecycler, uint32 InPoolIndex)
285 return FreeLists[InPoolIndex].ObtainPartial(LocalArenaParams, GlobalRecycler, InPoolIndex);
287 FBundleNode* PopBundles(uint32 InPoolIndex)
289 return FreeLists[InPoolIndex].PopBundles(InPoolIndex);
291 int64 AllocatedMemory;
292 TArray<FFreeBlockList> FreeLists;
295 struct FGlobalRecycler
297 void Init(uint32 PoolCount)
299 Bundles.AddDefaulted(PoolCount);
301 bool PushBundle(uint32 NumCachedBundles, uint32 InPoolIndex, FBundleNode* InBundle)
303 for (uint32 Slot = 0; Slot < NumCachedBundles && Slot < BINNEDARENA_MAX_GMallocBinnedArenaMaxBundlesBeforeRecycle; Slot++)
305 if (!Bundles[InPoolIndex].FreeBundles[Slot])
307 if (!FPlatformAtomics::InterlockedCompareExchangePointer((
void**)&Bundles[InPoolIndex].FreeBundles[Slot], InBundle,
nullptr))
316 FBundleNode* PopBundle(uint32 NumCachedBundles, uint32 InPoolIndex)
318 for (uint32 Slot = 0; Slot < NumCachedBundles && Slot < BINNEDARENA_MAX_GMallocBinnedArenaMaxBundlesBeforeRecycle; Slot++)
320 FBundleNode* Result = Bundles[InPoolIndex].FreeBundles[Slot];
323 if (FPlatformAtomics::InterlockedCompareExchangePointer((
void**)&Bundles[InPoolIndex].FreeBundles[Slot],
nullptr, Result) == Result)
333 struct FPaddedBundlePointer
335 FBundleNode* FreeBundles[BINNEDARENA_MAX_GMallocBinnedArenaMaxBundlesBeforeRecycle];
336 FPaddedBundlePointer()
338 DefaultConstructItems<FBundleNode*>(FreeBundles, BINNEDARENA_MAX_GMallocBinnedArenaMaxBundlesBeforeRecycle);
341 TArray<FPaddedBundlePointer> Bundles;
345 FORCEINLINE uint64 PoolIndexFromPtr(
const void* Ptr)
347 if (PoolSearchDiv == 0)
349 return (UPTRINT(Ptr) - UPTRINT(PoolBaseVMPtr[0])) >> ArenaParams.MaxMemoryPerBlockSizeShift;
351 uint64 PoolIndex = ArenaParams.PoolCount;
352 if (((uint8*)Ptr >= PoolBaseVMPtr[0]) & ((uint8*)Ptr < HighestPoolBaseVMPtr + ArenaParams.MaxMemoryPerBlockSize))
354 PoolIndex = uint64((uint8*)Ptr - PoolBaseVMPtr[0]) / PoolSearchDiv;
355 if (PoolIndex >= ArenaParams.PoolCount)
357 PoolIndex = ArenaParams.PoolCount - 1;
359 if ((uint8*)Ptr < PoolBaseVMPtr[(int32)PoolIndex])
364 check(PoolIndex < ArenaParams.PoolCount);
365 }
while ((uint8*)Ptr < PoolBaseVMPtr[(int32)PoolIndex]);
366 if ((uint8*)Ptr >= PoolBaseVMPtr[(int32)PoolIndex] + ArenaParams.MaxMemoryPerBlockSize)
368 PoolIndex = ArenaParams.PoolCount;
371 else if ((uint8*)Ptr >= PoolBaseVMPtr[(int32)PoolIndex] + ArenaParams.MaxMemoryPerBlockSize)
376 check(PoolIndex < ArenaParams.PoolCount);
377 }
while ((uint8*)Ptr >= PoolBaseVMPtr[(int32)PoolIndex] + ArenaParams.MaxMemoryPerBlockSize);
378 if ((uint8*)Ptr < PoolBaseVMPtr[(int32)PoolIndex])
380 PoolIndex = ArenaParams.PoolCount;
389 return PoolBaseVMPtr[InPoolIndex];
391 FORCEINLINE uint64 PoolIndexFromPtrChecked(
const void* Ptr)
393 uint64 Result = PoolIndexFromPtr(Ptr);
394 check(Result < ArenaParams.PoolCount);
400 return PoolIndexFromPtr(Ptr) >= ArenaParams.PoolCount;
404 FORCEINLINE void* BlockOfBlocksPointerFromContainedPtr(
const void* Ptr, uint8 PagesPlatformForBlockOfBlocks, uint32& OutBlockOfBlocksIndex)
406 uint32 PoolIndex = PoolIndexFromPtrChecked(Ptr);
407 uint8* PoolStart = PoolBasePtr(PoolIndex);
408 uint64 BlockOfBlocksIndex = (UPTRINT(Ptr) - UPTRINT(PoolStart)) / (UPTRINT(PagesPlatformForBlockOfBlocks) * UPTRINT(ArenaParams.AllocationGranularity));
409 OutBlockOfBlocksIndex = BlockOfBlocksIndex;
411 uint8* Result = PoolStart + BlockOfBlocksIndex * UPTRINT(PagesPlatformForBlockOfBlocks) * UPTRINT(ArenaParams.AllocationGranularity);
413 check(Result < PoolStart + ArenaParams.MaxMemoryPerBlockSize);
416 FORCEINLINE uint8* BlockPointerFromIndecies(uint32 InPoolIndex, uint32 BlockOfBlocksIndex, uint32 BlockOfBlocksSize)
418 uint8* PoolStart = PoolBasePtr(InPoolIndex);
419 uint8* Ptr = PoolStart + BlockOfBlocksIndex * uint64(BlockOfBlocksSize);
420 check(Ptr + BlockOfBlocksSize <= PoolStart + ArenaParams.MaxMemoryPerBlockSize);
423 FPoolInfoSmall* PushNewPoolToFront(FMallocBinnedArena& Allocator, uint32 InBlockSize, uint32 InPoolIndex, uint32& OutBlockOfBlocksIndex);
424 FPoolInfoSmall* GetFrontPool(FPoolTable& Table, uint32 InPoolIndex, uint32& OutBlockOfBlocksIndex);
426 FORCEINLINE bool AdjustSmallBlockSizeForAlignment(SIZE_T& InOutSize, uint32 Alignment)
428 if ((InOutSize <= ArenaParams.MaxPoolSize) & (Alignment <= ArenaParams.MinimumAlignment))
432 SIZE_T AlignedSize = Align(InOutSize, Alignment);
433 if (ArenaParams.bAttemptToAlignSmallBocks & (AlignedSize <= ArenaParams.MaxPoolSize) & (Alignment <= ArenaParams.MaximumAlignmentForSmallBlock))
435 uint32 PoolIndex = BoundSizeToPoolIndex(AlignedSize);
438 uint32 BlockSize = PoolIndexToBlockSize(PoolIndex);
439 if (IsAligned(BlockSize, Alignment))
441 InOutSize = SIZE_T(BlockSize);
445 check(PoolIndex < ArenaParams.PoolCount);
454 FMallocBinnedArena();
455 FArenaParams& GetParams()
459 void InitMallocBinned();
461 virtual ~FMallocBinnedArena();
465 virtual bool IsInternallyThreadSafe()
const override;
466 FORCEINLINE virtual void* Malloc(SIZE_T Size, uint32 Alignment) override
468 Alignment = FMath::Max<uint32>(Alignment, ArenaParams.MinimumAlignment);
470 void* Result =
nullptr;
474 if (AdjustSmallBlockSizeForAlignment(Size, Alignment))
476 FPerThreadFreeBlockLists* Lists = ArenaParams.bPerThreadCaches ? FPerThreadFreeBlockLists::Get(BinnedArenaTlsSlot) :
nullptr;
479 uint32 PoolIndex = BoundSizeToPoolIndex(Size);
480 uint32 BlockSize = PoolIndexToBlockSize(PoolIndex);
481 Result = Lists->Malloc(PoolIndex);
484 Lists->AllocatedMemory += BlockSize;
489 if (Result ==
nullptr)
491 Result = MallocExternal(Size, Alignment);
496 FORCEINLINE virtual void* Realloc(
void* Ptr, SIZE_T NewSize, uint32 Alignment) override
498 Alignment = FMath::Max<uint32>(Alignment, ArenaParams.MinimumAlignment);
499 if (AdjustSmallBlockSizeForAlignment(NewSize, Alignment))
501 FPerThreadFreeBlockLists* Lists = ArenaParams.bPerThreadCaches ? FPerThreadFreeBlockLists::Get(BinnedArenaTlsSlot) :
nullptr;
503 uint64 PoolIndex = PoolIndexFromPtr(Ptr);
504 if ((!!Lists) & ((!Ptr) | (PoolIndex < ArenaParams.PoolCount)))
506 uint32 BlockSize = 0;
508 bool bCanFree =
true;
512 BlockSize = PoolIndexToBlockSize(PoolIndex);
513 if ((!!NewSize) & (NewSize <= BlockSize) & IsAligned(BlockSize, Alignment) & ((!PoolIndex) | (NewSize > PoolIndexToBlockSize(PoolIndex - 1))))
517 bCanFree = Lists->CanFree(PoolIndex, BlockSize, ArenaParams);
521 uint32 NewPoolIndex = BoundSizeToPoolIndex(NewSize);
522 uint32 NewBlockSize = PoolIndexToBlockSize(NewPoolIndex);
523 void* Result = NewSize ? Lists->Malloc(NewPoolIndex) :
nullptr;
526 Lists->AllocatedMemory += NewBlockSize;
528 if (Result || !NewSize)
532 FMemory::Memcpy(Result, Ptr, FPlatformMath::Min<SIZE_T>(NewSize, BlockSize));
536 bool bDidPush = Lists->Free(Ptr, PoolIndex, BlockSize, ArenaParams);
538 Lists->AllocatedMemory -= BlockSize;
546 void* Result = ReallocExternal(Ptr, NewSize, Alignment);
552 uint64 PoolIndex = PoolIndexFromPtr(Ptr);
553 if (PoolIndex < ArenaParams.PoolCount)
555 FPerThreadFreeBlockLists* Lists = ArenaParams.bPerThreadCaches ? FPerThreadFreeBlockLists::Get(BinnedArenaTlsSlot) :
nullptr;
558 int32 BlockSize = PoolIndexToBlockSize(PoolIndex);
559 if (Lists->Free(Ptr, PoolIndex, BlockSize, ArenaParams))
561 Lists->AllocatedMemory -= BlockSize;
568 FORCEINLINE virtual bool GetAllocationSize(
void *Ptr, SIZE_T &SizeOut) override
570 uint64 PoolIndex = PoolIndexFromPtr(Ptr);
571 if (PoolIndex < ArenaParams.PoolCount)
573 SizeOut = PoolIndexToBlockSize(PoolIndex);
576 return GetAllocationSizeExternal(Ptr, SizeOut);
579 FORCEINLINE virtual SIZE_T QuantizeSize(SIZE_T Count, uint32 Alignment) override
581 check(DEFAULT_ALIGNMENT <= ArenaParams.MinimumAlignment);
582 checkSlow((Alignment & (Alignment - 1)) == 0);
584 if ((Count <= ArenaParams.MaxPoolSize) & (Alignment <= ArenaParams.MinimumAlignment))
586 SizeOut = PoolIndexToBlockSize(BoundSizeToPoolIndex(Count));
590 Alignment = FPlatformMath::Max<uint32>(Alignment, ArenaParams.AllocationGranularity);
591 SizeOut = Align(Count, Alignment);
593 check(SizeOut >= Count);
597 virtual bool ValidateHeap() override;
598 virtual void Trim(
bool bTrimThreadCaches) override;
599 virtual void SetupTLSCachesOnCurrentThread() override;
600 virtual void ClearAndDisableTLSCachesOnCurrentThread() override;
601 virtual const TCHAR* GetDescriptiveName() override;
604 void FlushCurrentThreadCache();
605 void* MallocExternal(SIZE_T Size, uint32 Alignment);
606 void* ReallocExternal(
void* Ptr, SIZE_T NewSize, uint32 Alignment);
607 void FreeExternal(
void *Ptr);
608 bool GetAllocationSizeExternal(
void* Ptr, SIZE_T& SizeOut);
610 MBA_STAT(int64 GetTotalAllocatedSmallPoolMemory();)
611 virtual void GetAllocatorStats(FGenericMemoryStats& out_Stats) override;
613 virtual void DumpAllocatorStats(
class FOutputDevice& Ar) override;
615 FORCEINLINE uint32 BoundSizeToPoolIndex(SIZE_T Size)
617 auto Index = ((Size + ArenaParams.MinimumAlignment - 1) >> ArenaParams.MinimumAlignmentShift);
618 checkSlow(Index >= 0 && Index <= (ArenaParams.MaxPoolSize >> ArenaParams.MinimumAlignmentShift));
619 uint32 PoolIndex = uint32(MemSizeToIndex[Index]);
620 checkSlow(PoolIndex >= 0 && PoolIndex < ArenaParams.PoolCount);
623 FORCEINLINE uint32 PoolIndexToBlockSize(uint32 PoolIndex)
625 return uint32(SmallBlockSizesReversedShifted[ArenaParams.PoolCount - PoolIndex - 1]) << ArenaParams.MinimumAlignmentShift;
628 void Commit(uint32 InPoolIndex,
void *Ptr, SIZE_T Size);
629 void Decommit(uint32 InPoolIndex,
void *Ptr, SIZE_T Size);
633 TArray<FPoolTable> SmallPoolTables;
635 uint32 SmallPoolInfosPerPlatformPage;
637 PoolHashBucket* HashBuckets;
638 PoolHashBucket* HashBucketFreeList;
639 uint64 NumLargePoolsPerPage;
641 FCriticalSection Mutex;
642 FGlobalRecycler GGlobalRecycler;
643 FPtrToPoolMapping PtrToPoolMapping;
645 FArenaParams ArenaParams;
647 TArray<uint16> SmallBlockSizesReversedShifted;
648 uint32 BinnedArenaTlsSlot;
649 uint64 PoolSearchDiv;
650 uint8* HighestPoolBaseVMPtr;
651 FPlatformMemory::FPlatformVirtualMemoryBlock PoolBaseVMBlock;
652 TArray<uint8*> PoolBaseVMPtr;
653 TArray<FPlatformMemory::FPlatformVirtualMemoryBlock> PoolBaseVMBlocks;
655 TArray<uint8> MemSizeToIndex;
658 int64 BinnedArenaAllocatedSmallPoolMemory = 0;
659 int64 BinnedArenaAllocatedOSSmallPoolMemory = 0;
661 int64 BinnedArenaAllocatedLargePoolMemory = 0;
662 int64 BinnedArenaAllocatedLargePoolMemoryWAlignment = 0;
664 int64 BinnedArenaPoolInfoMemory = 0;
665 int64 BinnedArenaHashMemory = 0;
666 int64 BinnedArenaFreeBitsMemory = 0;
667 int64 BinnedArenaTLSMemory = 0;
668 TAtomic<int64> ConsolidatedMemory;
671 FCriticalSection FreeBlockListsRegistrationMutex;
672 FCriticalSection& GetFreeBlockListsRegistrationMutex()
674 return FreeBlockListsRegistrationMutex;
676 TArray<FPerThreadFreeBlockLists*> RegisteredFreeBlockLists;
677 TArray<FPerThreadFreeBlockLists*>& GetRegisteredFreeBlockLists()
679 return RegisteredFreeBlockLists;
681 void RegisterThreadFreeBlockLists(FPerThreadFreeBlockLists* FreeBlockLists)
683 FScopeLock Lock(&GetFreeBlockListsRegistrationMutex());
684 GetRegisteredFreeBlockLists().Add(FreeBlockLists);
686 int64 UnregisterThreadFreeBlockLists(FPerThreadFreeBlockLists* FreeBlockLists)
688 FScopeLock Lock(&GetFreeBlockListsRegistrationMutex());
689 int64 Result = FreeBlockLists->AllocatedMemory;
691 GetRegisteredFreeBlockLists().Remove(FreeBlockLists);
695 TArray<
void*> MallocedPointers;
698PRAGMA_RESTORE_UNSAFE_TYPECAST_WARNINGS
#define UE_BUILD_SHIPPING