Ark Server API (ASA) - Wiki
Loading...
Searching...
No Matches
MallocBinned3.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2
3#pragma once
4
5#include "CoreTypes.h"
6
8#include "HAL/Allocators/CachedOSPageAllocator.h"
9#include "HAL/Allocators/PooledVirtualMemoryAllocator.h"
10#include "HAL/CriticalSection.h"
11#include "HAL/LowLevelMemTracker.h"
12#include "HAL/MallocBinnedCommon.h"
13#include "HAL/MemoryBase.h"
14#include "HAL/PlatformMath.h"
15#include "HAL/PlatformMemory.h"
16#include "HAL/PlatformTLS.h"
17#include "HAL/UnrealMemory.h"
18#include "Math/NumericLimits.h"
19#include "Misc/AssertionMacros.h"
20#include "Templates/AlignmentTemplates.h"
21#include "Templates/Atomic.h"
22
24
25#define USE_CACHED_PAGE_ALLOCATOR_FOR_LARGE_ALLOCS (0)
26
27#ifndef USE_512MB_MAX_MEMORY_PER_BLOCK_SIZE
28#define USE_512MB_MAX_MEMORY_PER_BLOCK_SIZE 0
29#endif
30
31#define BINNED3_BASE_PAGE_SIZE 4096 // Minimum "page size" for binned3
32#define BINNED3_MINIMUM_ALIGNMENT_SHIFT 4 // Alignment of blocks, expressed as a shift
33#define BINNED3_MINIMUM_ALIGNMENT 16 // Alignment of blocks
34
35#ifndef BINNED3_MAX_SMALL_POOL_ALIGNMENT
36#define BINNED3_MAX_SMALL_POOL_ALIGNMENT 128
37#endif
38
39
40#ifndef BINNED3_MAX_SMALL_POOL_SIZE
41#if USE_CACHED_PAGE_ALLOCATOR_FOR_LARGE_ALLOCS
42#define BINNED3_MAX_SMALL_POOL_SIZE (BINNEDCOMMON_MAX_LISTED_SMALL_POOL_SIZE) // Maximum medium block size
43#else
44#define BINNED3_MAX_SMALL_POOL_SIZE (128 * 1024) // Maximum medium block size
45#endif
46#endif
47#define BINNED3_SMALL_POOL_COUNT (BINNEDCOMMON_NUM_LISTED_SMALL_POOLS + (BINNED3_MAX_SMALL_POOL_SIZE - BINNEDCOMMON_MAX_LISTED_SMALL_POOL_SIZE) / BINNED3_BASE_PAGE_SIZE)
48
49#if USE_512MB_MAX_MEMORY_PER_BLOCK_SIZE
50#define MAX_MEMORY_PER_BLOCK_SIZE_SHIFT (29) // maximum of 512MB per block size
51#else
52#define MAX_MEMORY_PER_BLOCK_SIZE_SHIFT (30) // maximum of 1GB per block size
53#endif
54
55#define MAX_MEMORY_PER_BLOCK_SIZE (1ull << MAX_MEMORY_PER_BLOCK_SIZE_SHIFT)
56
57// This choice depends on how efficient the OS is with sparse commits in large VM blocks
58#if !defined(BINNED3_USE_SEPARATE_VM_PER_POOL)
60 #define BINNED3_USE_SEPARATE_VM_PER_POOL (1)
61 #else
62 #define BINNED3_USE_SEPARATE_VM_PER_POOL (0)
63 #endif
64#endif
65
66#define DEFAULT_GMallocBinned3PerThreadCaches 1
67#define DEFAULT_GMallocBinned3BundleCount 64
68#define DEFAULT_GMallocBinned3AllocExtra 32
69#define BINNED3_MAX_GMallocBinned3MaxBundlesBeforeRecycle 8
70
71#if !defined(AGGRESSIVE_MEMORY_SAVING)
72 #error "AGGRESSIVE_MEMORY_SAVING must be defined"
73#endif
75 #define DEFAULT_GMallocBinned3BundleSize 8192
76#else
77 #define DEFAULT_GMallocBinned3BundleSize 65536
78#endif
79
80
81#define BINNED3_ALLOW_RUNTIME_TWEAKING 0
82#if BINNED3_ALLOW_RUNTIME_TWEAKING
83 extern int32 GMallocBinned3PerThreadCaches;
84 extern int32 GMallocBinned3BundleSize = DEFAULT_GMallocBinned3BundleSize;
85 extern int32 GMallocBinned3BundleCount = DEFAULT_GMallocBinned3BundleCount;
86 extern int32 GMallocBinned3MaxBundlesBeforeRecycle = BINNED3_MAX_GMallocBinned3MaxBundlesBeforeRecycle;
87 extern int32 GMallocBinned3AllocExtra = DEFAULT_GMallocBinned3AllocExtra;
88#else
89 #define GMallocBinned3PerThreadCaches DEFAULT_GMallocBinned3PerThreadCaches
90 #define GMallocBinned3BundleSize DEFAULT_GMallocBinned3BundleSize
91 #define GMallocBinned3BundleCount DEFAULT_GMallocBinned3BundleCount
92 #define GMallocBinned3MaxBundlesBeforeRecycle BINNED3_MAX_GMallocBinned3MaxBundlesBeforeRecycle
93 #define GMallocBinned3AllocExtra DEFAULT_GMallocBinned3AllocExtra
94#endif
95
96
97#ifndef BINNED3_ALLOCATOR_STATS
99 #define BINNED3_ALLOCATOR_STATS 0
100 #else
101 #define BINNED3_ALLOCATOR_STATS 1
102 #endif
103#endif
104
105
106#if BINNED3_ALLOCATOR_STATS
107 #if !(UE_BUILD_SHIPPING || UE_BUILD_TEST)
108 #define BINNED3_ALLOCATOR_PER_BIN_STATS 1
109 #else
110 #define BINNED3_ALLOCATOR_PER_BIN_STATS 0
111 #endif
112#else
113 #define BINNED3_ALLOCATOR_PER_BIN_STATS 0
114#endif
115
116PRAGMA_DISABLE_UNSAFE_TYPECAST_WARNINGS
117
118//
119// Optimized virtual memory allocator.
120//
121class FMallocBinned3 : public FMalloc
122{
123 // Forward declares.
124 struct FPoolInfoLarge;
125 struct FPoolInfoSmall;
126 struct FPoolTable;
127 struct PoolHashBucket;
128 struct Private;
129
130
131 /** Information about a piece of free memory. */
132 struct FFreeBlock
133 {
134 enum
135 {
136 CANARY_VALUE = 0xe7
137 };
138
139 FORCEINLINE FFreeBlock(uint32 InPageSize, uint32 InBlockSize, uint8 InPoolIndex)
140 : BlockSizeShifted(InBlockSize >> BINNED3_MINIMUM_ALIGNMENT_SHIFT)
141 , PoolIndex(InPoolIndex)
142 , Canary(CANARY_VALUE)
143 , NextFreeIndex(MAX_uint32)
144 {
145 check(InPoolIndex < MAX_uint8 && (InBlockSize >> BINNED3_MINIMUM_ALIGNMENT_SHIFT) <= MAX_uint16);
146 NumFreeBlocks = InPageSize / InBlockSize;
147 }
148
149 FORCEINLINE uint32 GetNumFreeRegularBlocks() const
150 {
151 return NumFreeBlocks;
152 }
153 FORCEINLINE bool IsCanaryOk() const
154 {
155 return Canary == FFreeBlock::CANARY_VALUE;
156 }
157
158 FORCEINLINE void CanaryTest() const
159 {
160 if (!IsCanaryOk())
161 {
162 CanaryFail();
163 }
164 //checkSlow(PoolIndex == BoundSizeToPoolIndex(BlockSize));
165 }
166 void CanaryFail() const;
167
168 FORCEINLINE void* AllocateRegularBlock()
169 {
170 --NumFreeBlocks;
171 return (uint8*)this + NumFreeBlocks* (uint32(BlockSizeShifted) << BINNED3_MINIMUM_ALIGNMENT_SHIFT);
172 }
173
174 uint16 BlockSizeShifted; // Size of the blocks that this list points to >> BINNED3_MINIMUM_ALIGNMENT_SHIFT
175 uint8 PoolIndex; // Index of this pool
176 uint8 Canary; // Constant value of 0xe3
177 uint32 NumFreeBlocks; // Number of consecutive free blocks here, at least 1.
178 uint32 NextFreeIndex; // Next free block or MAX_uint32
179 };
180
181 /** Pool table. */
182 struct FPoolTable
183 {
184 uint32 BlockSize;
185 uint16 BlocksPerBlockOfBlocks;
186 uint8 PagesPlatformForBlockOfBlocks;
187
188 FBitTree BlockOfBlockAllocationBits; // one bits in here mean the virtual memory is committed
189 FBitTree BlockOfBlockIsExhausted; // one bit in here means the pool is completely full
190
191 uint32 NumEverUsedBlockOfBlocks;
192 FPoolInfoSmall** PoolInfos;
193
194 uint64 UnusedAreaOffsetLow;
195
196#if BINNED3_ALLOCATOR_PER_BIN_STATS
197 // these are "head end" stats, above the TLS cache
198 TAtomic<int64> TotalRequestedAllocSize;
199 TAtomic<int64> TotalAllocCount;
200 TAtomic<int64> TotalFreeCount;
201
202 FORCEINLINE void HeadEndAlloc(SIZE_T Size)
203 {
204 check(Size >= 0 && Size <= BlockSize);
205 TotalRequestedAllocSize += Size;
206 TotalAllocCount++;
207 }
208 FORCEINLINE void HeadEndFree()
209 {
210 TotalFreeCount++;
211 }
212#else
213 FORCEINLINE void HeadEndAlloc(SIZE_T Size)
214 {
215 }
216 FORCEINLINE void HeadEndFree()
217 {
218 }
219#endif
220 };
221
222 struct FPtrToPoolMapping
223 {
224 FPtrToPoolMapping()
225 : PtrToPoolPageBitShift(0)
226 , HashKeyShift(0)
227 , PoolMask(0)
228 , MaxHashBuckets(0)
229 {
230 }
231 explicit FPtrToPoolMapping(uint32 InPageSize, uint64 InNumPoolsPerPage, uint64 AddressLimit)
232 {
233 Init(InPageSize, InNumPoolsPerPage, AddressLimit);
234 }
235
236 void Init(uint32 InPageSize, uint64 InNumPoolsPerPage, uint64 AddressLimit)
237 {
238 uint64 PoolPageToPoolBitShift = FPlatformMath::CeilLogTwo(InNumPoolsPerPage);
239
240 PtrToPoolPageBitShift = FPlatformMath::CeilLogTwo(InPageSize);
241 HashKeyShift = PtrToPoolPageBitShift + PoolPageToPoolBitShift;
242 PoolMask = (1ull << PoolPageToPoolBitShift) - 1;
243 MaxHashBuckets = AddressLimit >> HashKeyShift;
244 }
245
246 FORCEINLINE void GetHashBucketAndPoolIndices(const void* InPtr, uint32& OutBucketIndex, UPTRINT& OutBucketCollision, uint32& OutPoolIndex) const
247 {
248 OutBucketCollision = (UPTRINT)InPtr >> HashKeyShift;
249 OutBucketIndex = uint32(OutBucketCollision & (MaxHashBuckets - 1));
250 OutPoolIndex = ((UPTRINT)InPtr >> PtrToPoolPageBitShift) & PoolMask;
251 }
252
253 FORCEINLINE uint64 GetMaxHashBuckets() const
254 {
255 return MaxHashBuckets;
256 }
257
258 private:
259 /** Shift to apply to a pointer to get the reference from the indirect tables */
260 uint64 PtrToPoolPageBitShift;
261
262 /** Shift required to get required hash table key. */
263 uint64 HashKeyShift;
264
265 /** Used to mask off the bits that have been used to lookup the indirect table */
266 uint64 PoolMask;
267
268 // PageSize dependent constants
269 uint64 MaxHashBuckets;
270 };
271
272 FPtrToPoolMapping PtrToPoolMapping;
273
274 // Pool tables for different pool sizes
275 FPoolTable SmallPoolTables[BINNED3_SMALL_POOL_COUNT];
276
277 uint32 SmallPoolInfosPerPlatformPage;
278
279 PoolHashBucket* HashBuckets;
280 PoolHashBucket* HashBucketFreeList;
281 uint64 NumLargePoolsPerPage;
282
283 FCriticalSection Mutex;
284
285 struct FBundleNode
286 {
287 FBundleNode* NextNodeInCurrentBundle;
288 union
289 {
290 FBundleNode* NextBundle;
291 int32 Count;
292 };
293 };
294
295 struct FBundle
296 {
297 FORCEINLINE FBundle()
298 {
299 Reset();
300 }
301
302 FORCEINLINE void Reset()
303 {
304 Head = nullptr;
305 Count = 0;
306 }
307
308 FORCEINLINE void PushHead(FBundleNode* Node)
309 {
310 Node->NextNodeInCurrentBundle = Head;
311 Node->NextBundle = nullptr;
312 Head = Node;
313 Count++;
314 }
315
316 FORCEINLINE FBundleNode* PopHead()
317 {
318 FBundleNode* Result = Head;
319
320 Count--;
321 Head = Head->NextNodeInCurrentBundle;
322 return Result;
323 }
324
325 FBundleNode* Head;
326 uint32 Count;
327 };
328 static_assert(sizeof(FBundleNode) <= BINNED3_MINIMUM_ALIGNMENT, "Bundle nodes must fit into the smallest block size");
329
330 struct FFreeBlockList
331 {
332 // return true if we actually pushed it
333 FORCEINLINE bool PushToFront(void* InPtr, uint32 InPoolIndex, uint32 InBlockSize)
334 {
335 checkSlow(InPtr);
336
337 if ((PartialBundle.Count >= (uint32)GMallocBinned3BundleCount) | (PartialBundle.Count * InBlockSize >= (uint32)GMallocBinned3BundleSize))
338 {
339 if (FullBundle.Head)
340 {
341 return false;
342 }
343 FullBundle = PartialBundle;
344 PartialBundle.Reset();
345 }
346 PartialBundle.PushHead((FBundleNode*)InPtr);
347 return true;
348 }
349 FORCEINLINE bool CanPushToFront(uint32 InPoolIndex, uint32 InBlockSize)
350 {
351 return !((!!FullBundle.Head) & ((PartialBundle.Count >= (uint32)GMallocBinned3BundleCount) | (PartialBundle.Count * InBlockSize >= (uint32)GMallocBinned3BundleSize)));
352 }
353 FORCEINLINE void* PopFromFront(uint32 InPoolIndex)
354 {
355 if ((!PartialBundle.Head) & (!!FullBundle.Head))
356 {
357 PartialBundle = FullBundle;
358 FullBundle.Reset();
359 }
360 return PartialBundle.Head ? PartialBundle.PopHead() : nullptr;
361 }
362
363 // tries to recycle the full bundle, if that fails, it is returned for freeing
364 FBundleNode* RecyleFull(uint32 InPoolIndex);
365 bool ObtainPartial(uint32 InPoolIndex);
366 FBundleNode* PopBundles(uint32 InPoolIndex);
367 private:
368 FBundle PartialBundle;
369 FBundle FullBundle;
370 };
371
372 struct FPerThreadFreeBlockLists
373 {
374 FORCEINLINE static FPerThreadFreeBlockLists* Get()
375 {
376 return FMallocBinned3::Binned3TlsSlot ? (FPerThreadFreeBlockLists*)FPlatformTLS::GetTlsValue(FMallocBinned3::Binned3TlsSlot) : nullptr;
377 }
378 static void SetTLS();
379 static void ClearTLS();
380
381 FPerThreadFreeBlockLists()
382#if BINNED3_ALLOCATOR_STATS
383 : AllocatedMemory(0)
384#endif
385 { }
386
387 FORCEINLINE void* Malloc(uint32 InPoolIndex)
388 {
389 return FreeLists[InPoolIndex].PopFromFront(InPoolIndex);
390 }
391 // return true if the pointer was pushed
392 FORCEINLINE bool Free(void* InPtr, uint32 InPoolIndex, uint32 InBlockSize)
393 {
394 return FreeLists[InPoolIndex].PushToFront(InPtr, InPoolIndex, InBlockSize);
395 }
396 // return true if a pointer can be pushed
397 FORCEINLINE bool CanFree(uint32 InPoolIndex, uint32 InBlockSize)
398 {
399 return FreeLists[InPoolIndex].CanPushToFront(InPoolIndex, InBlockSize);
400 }
401 // returns a bundle that needs to be freed if it can't be recycled
402 FBundleNode* RecycleFullBundle(uint32 InPoolIndex)
403 {
404 return FreeLists[InPoolIndex].RecyleFull(InPoolIndex);
405 }
406 // returns true if we have anything to pop
407 bool ObtainRecycledPartial(uint32 InPoolIndex)
408 {
409 return FreeLists[InPoolIndex].ObtainPartial(InPoolIndex);
410 }
411 FBundleNode* PopBundles(uint32 InPoolIndex)
412 {
413 return FreeLists[InPoolIndex].PopBundles(InPoolIndex);
414 }
415#if BINNED3_ALLOCATOR_STATS
416 public:
417 int64 AllocatedMemory;
418 static TAtomic<int64> ConsolidatedMemory;
419#endif
420 private:
421 FFreeBlockList FreeLists[BINNED3_SMALL_POOL_COUNT];
422 };
423
424#if !BINNED3_USE_SEPARATE_VM_PER_POOL
425 FORCEINLINE uint64 PoolIndexFromPtr(const void* Ptr) // returns a uint64 for it can also be used to check if it is an OS allocation
426 {
427 return (UPTRINT(Ptr) - UPTRINT(Binned3BaseVMPtr)) >> MAX_MEMORY_PER_BLOCK_SIZE_SHIFT;
428 }
429 FORCEINLINE uint8* PoolBasePtr(uint32 InPoolIndex)
430 {
431 return Binned3BaseVMPtr + InPoolIndex * MAX_MEMORY_PER_BLOCK_SIZE;
432 }
433#else
434#if BINNED3_ALLOCATOR_STATS
435 void RecordPoolSearch(uint32 Tests);
436#else
437 FORCEINLINE void RecordPoolSearch(uint32 Tests)
438 {
439
440 }
441#endif
442 FORCEINLINE uint64 PoolIndexFromPtr(const void* Ptr) // returns a uint64 for it can also be used to check if it is an OS allocation
443 {
444 if (PoolSearchDiv == 0)
445 {
446 return (UPTRINT(Ptr) - UPTRINT(PoolBaseVMPtr[0])) >> MAX_MEMORY_PER_BLOCK_SIZE_SHIFT;
447 }
448 uint64 PoolIndex = BINNED3_SMALL_POOL_COUNT;
449 if (((uint8*)Ptr >= PoolBaseVMPtr[0]) & ((uint8*)Ptr < HighestPoolBaseVMPtr + MAX_MEMORY_PER_BLOCK_SIZE))
450 {
451 PoolIndex = uint64((uint8*)Ptr - PoolBaseVMPtr[0]) / PoolSearchDiv;
452 if (PoolIndex >= BINNED3_SMALL_POOL_COUNT)
453 {
454 PoolIndex = BINNED3_SMALL_POOL_COUNT - 1;
455 }
456 uint32 Tests = 1; // we are counting potential cache misses here, not actual comparisons
457 if ((uint8*)Ptr < PoolBaseVMPtr[PoolIndex])
458 {
459 do
460 {
461 Tests++;
462 PoolIndex--;
463 check(PoolIndex < BINNED3_SMALL_POOL_COUNT);
464 } while ((uint8*)Ptr < PoolBaseVMPtr[PoolIndex]);
465 if ((uint8*)Ptr >= PoolBaseVMPtr[PoolIndex] + MAX_MEMORY_PER_BLOCK_SIZE)
466 {
467 PoolIndex = BINNED3_SMALL_POOL_COUNT; // was in the gap
468 }
469 }
470 else if ((uint8*)Ptr >= PoolBaseVMPtr[PoolIndex] + MAX_MEMORY_PER_BLOCK_SIZE)
471 {
472 do
473 {
474 Tests++;
475 PoolIndex++;
476 check(PoolIndex < BINNED3_SMALL_POOL_COUNT);
477 } while ((uint8*)Ptr >= PoolBaseVMPtr[PoolIndex] + MAX_MEMORY_PER_BLOCK_SIZE);
478 if ((uint8*)Ptr < PoolBaseVMPtr[PoolIndex])
479 {
480 PoolIndex = BINNED3_SMALL_POOL_COUNT; // was in the gap
481 }
482 }
483 RecordPoolSearch(Tests);
484 }
485 return PoolIndex;
486 }
487
488 FORCEINLINE uint8* PoolBasePtr(uint32 InPoolIndex)
489 {
490 return PoolBaseVMPtr[InPoolIndex];
491 }
492#endif
493 FORCEINLINE uint32 PoolIndexFromPtrChecked(const void* Ptr)
494 {
495 uint64 Result = PoolIndexFromPtr(Ptr);
496 check(Result < BINNED3_SMALL_POOL_COUNT);
497 return (uint32)Result;
498 }
499
500 FORCEINLINE bool IsOSAllocation(const void* Ptr)
501 {
502 return PoolIndexFromPtr(Ptr) >= BINNED3_SMALL_POOL_COUNT;
503 }
504
505
506 FORCEINLINE void* BlockOfBlocksPointerFromContainedPtr(const void* Ptr, uint8 PagesPlatformForBlockOfBlocks, uint32& OutBlockOfBlocksIndex)
507 {
508 uint32 PoolIndex = PoolIndexFromPtrChecked(Ptr);
509 uint8* PoolStart = PoolBasePtr(PoolIndex);
510 uint64 BlockOfBlocksIndex = (UPTRINT(Ptr) - UPTRINT(PoolStart)) / (UPTRINT(PagesPlatformForBlockOfBlocks) * UPTRINT(OsAllocationGranularity));
511 OutBlockOfBlocksIndex = BlockOfBlocksIndex;
512
513 uint8* Result = PoolStart + BlockOfBlocksIndex * UPTRINT(PagesPlatformForBlockOfBlocks) * UPTRINT(OsAllocationGranularity);
514
515 check(Result < PoolStart + MAX_MEMORY_PER_BLOCK_SIZE);
516 return Result;
517 }
518 FORCEINLINE uint8* BlockPointerFromIndecies(uint32 InPoolIndex, uint32 BlockOfBlocksIndex, uint32 BlockOfBlocksSize)
519 {
520 uint8* PoolStart = PoolBasePtr(InPoolIndex);
521 uint8* Ptr = PoolStart + BlockOfBlocksIndex * uint64(BlockOfBlocksSize);
522 check(Ptr + BlockOfBlocksSize <= PoolStart + MAX_MEMORY_PER_BLOCK_SIZE);
523 return Ptr;
524 }
525 FPoolInfoSmall* PushNewPoolToFront(FPoolTable& Table, uint32 InBlockSize, uint32 InPoolIndex, uint32& OutBlockOfBlocksIndex);
526 FPoolInfoSmall* GetFrontPool(FPoolTable& Table, uint32 InPoolIndex, uint32& OutBlockOfBlocksIndex);
527
528public:
529
530
531 FMallocBinned3();
532
533 virtual ~FMallocBinned3();
534
535 // FMalloc interface.
536 virtual bool IsInternallyThreadSafe() const override;
537 FORCEINLINE virtual void* Malloc(SIZE_T Size, uint32 Alignment) override
538 {
539 void* Result = nullptr;
540
541 // Only allocate from the small pools if the size is small enough and the alignment isn't crazy large.
542 // With large alignments, we'll waste a lot of memory allocating an entire page, but such alignments are highly unlikely in practice.
543 if ((Size <= BINNED3_MAX_SMALL_POOL_SIZE) & (Alignment <= BINNED3_MINIMUM_ALIGNMENT)) // one branch, not two
544 {
545 FPerThreadFreeBlockLists* Lists = GMallocBinned3PerThreadCaches ? FPerThreadFreeBlockLists::Get() : nullptr;
546 if (Lists)
547 {
548 uint32 PoolIndex = BoundSizeToPoolIndex(Size);
549 uint32 BlockSize = PoolIndexToBlockSize(PoolIndex);
550 Result = Lists->Malloc(PoolIndex);
551#if BINNED3_ALLOCATOR_STATS
552 if (Result)
553 {
554 SmallPoolTables[PoolIndex].HeadEndAlloc(Size);
555 Lists->AllocatedMemory += BlockSize;
556 }
557#endif
558 }
559 }
560 if (Result == nullptr)
561 {
562 Result = MallocExternal(Size, Alignment);
563 }
564
565 return Result;
566 }
567 FORCEINLINE virtual void* Realloc(void* Ptr, SIZE_T NewSize, uint32 Alignment) override
568 {
569 if (NewSize <= BINNED3_MAX_SMALL_POOL_SIZE && Alignment <= BINNED3_MINIMUM_ALIGNMENT) // one branch, not two
570 {
571 FPerThreadFreeBlockLists* Lists = GMallocBinned3PerThreadCaches ? FPerThreadFreeBlockLists::Get() : nullptr;
572
573 uint64 PoolIndex = PoolIndexFromPtr(Ptr);
574 if ((!!Lists) & ((!Ptr) | (PoolIndex < BINNED3_SMALL_POOL_COUNT)))
575 {
576 uint32 BlockSize = 0;
577
578 bool bCanFree = true; // the nullptr is always "freeable"
579 if (Ptr)
580 {
581 // Reallocate to a smaller/bigger pool if necessary
582 BlockSize = PoolIndexToBlockSize(PoolIndex);
583 if ((!!NewSize) & (NewSize <= BlockSize) & ((!PoolIndex) | (NewSize > PoolIndexToBlockSize(static_cast<uint32>(PoolIndex - 1)))))
584 {
585#if BINNED3_ALLOCATOR_STATS
586 SmallPoolTables[PoolIndex].HeadEndAlloc(NewSize);
587 SmallPoolTables[PoolIndex].HeadEndFree();
588#endif
589 return Ptr;
590 }
591 bCanFree = Lists->CanFree(PoolIndex, BlockSize);
592 }
593 if (bCanFree)
594 {
595 uint32 NewPoolIndex = BoundSizeToPoolIndex(NewSize);
596 uint32 NewBlockSize = PoolIndexToBlockSize(NewPoolIndex);
597 void* Result = NewSize ? Lists->Malloc(NewPoolIndex) : nullptr;
598#if BINNED3_ALLOCATOR_STATS
599 if (Result)
600 {
601 SmallPoolTables[NewPoolIndex].HeadEndAlloc(NewSize);
602 Lists->AllocatedMemory += NewBlockSize;
603 }
604#endif
605 if (Result || !NewSize)
606 {
607 if (Result && Ptr)
608 {
609 FMemory::Memcpy(Result, Ptr, FPlatformMath::Min<SIZE_T>(NewSize, BlockSize));
610 }
611 if (Ptr)
612 {
613 bool bDidPush = Lists->Free(Ptr, PoolIndex, BlockSize);
614 checkSlow(bDidPush);
615#if BINNED3_ALLOCATOR_STATS
616 SmallPoolTables[PoolIndex].HeadEndFree();
617 Lists->AllocatedMemory -= BlockSize;
618#endif
619 }
620
621 return Result;
622 }
623 }
624 }
625 }
626 void* Result = ReallocExternal(Ptr, NewSize, Alignment);
627 return Result;
628 }
629
630 FORCEINLINE virtual void Free(void* Ptr) override
631 {
632 uint64 PoolIndex = PoolIndexFromPtr(Ptr);
633 if (PoolIndex < BINNED3_SMALL_POOL_COUNT)
634 {
635 FPerThreadFreeBlockLists* Lists = GMallocBinned3PerThreadCaches ? FPerThreadFreeBlockLists::Get() : nullptr;
636 if (Lists)
637 {
638 int32 BlockSize = PoolIndexToBlockSize(PoolIndex);
639 if (Lists->Free(Ptr, PoolIndex, BlockSize))
640 {
641#if BINNED3_ALLOCATOR_STATS
642 SmallPoolTables[PoolIndex].HeadEndFree();
643 Lists->AllocatedMemory -= BlockSize;
644#endif
645 return;
646 }
647 }
648 }
649 FreeExternal(Ptr);
650 }
651 FORCEINLINE virtual bool GetAllocationSize(void *Ptr, SIZE_T &SizeOut) override
652 {
653 uint64 PoolIndex = PoolIndexFromPtr(Ptr);
654 if (PoolIndex < BINNED3_SMALL_POOL_COUNT)
655 {
656 SizeOut = PoolIndexToBlockSize(PoolIndex);
657 return true;
658 }
659 return GetAllocationSizeExternal(Ptr, SizeOut);
660 }
661
662 FORCEINLINE virtual SIZE_T QuantizeSize(SIZE_T Count, uint32 Alignment) override
663 {
664 static_assert(DEFAULT_ALIGNMENT <= BINNED3_MINIMUM_ALIGNMENT, "DEFAULT_ALIGNMENT is assumed to be zero"); // used below
665 checkSlow((Alignment & (Alignment - 1)) == 0); // Check the alignment is a power of two
666 SIZE_T SizeOut;
667 if ((Count <= BINNED3_MAX_SMALL_POOL_SIZE) & (Alignment <= BINNED3_MINIMUM_ALIGNMENT)) // one branch, not two
668 {
669 SizeOut = PoolIndexToBlockSize(BoundSizeToPoolIndex(Count));
670 }
671 else
672 {
673 Alignment = FPlatformMath::Max<uint32>(Alignment, OsAllocationGranularity);
674 SizeOut = Align(Count, Alignment);
675 }
676 check(SizeOut >= Count);
677 return SizeOut;
678 }
679
680 virtual bool ValidateHeap() override;
681 virtual void Trim(bool bTrimThreadCaches) override;
682 virtual void SetupTLSCachesOnCurrentThread() override;
683 virtual void ClearAndDisableTLSCachesOnCurrentThread() override;
684 virtual const TCHAR* GetDescriptiveName() override;
685 // End FMalloc interface.
686
687 void FlushCurrentThreadCache();
688 void* MallocExternal(SIZE_T Size, uint32 Alignment);
689 void* ReallocExternal(void* Ptr, SIZE_T NewSize, uint32 Alignment);
690 void FreeExternal(void *Ptr);
691 bool GetAllocationSizeExternal(void* Ptr, SIZE_T& SizeOut);
692
693#if BINNED3_ALLOCATOR_STATS
694 int64 GetTotalAllocatedSmallPoolMemory() const;
695#endif
696 virtual void GetAllocatorStats( FGenericMemoryStats& out_Stats ) override;
697 /** Dumps current allocator stats to the log. */
698 virtual void DumpAllocatorStats(class FOutputDevice& Ar) override;
699
700 // +1 enables PoolIndexToBlockSize(~0u / -1) dummy access that helps avoid PoolIndex == 0 branching in Realloc(),
701 // see ((!PoolIndex) | (NewSize > PoolIndexToBlockSize(static_cast<uint32>(PoolIndex - 1)))
702 static uint16 SmallBlockSizesReversedShifted[BINNED3_SMALL_POOL_COUNT + 1]; // this is reversed to get the smallest elements on our main cache line
703 static FMallocBinned3* MallocBinned3;
704 static uint32 Binned3TlsSlot;
705 static uint32 OsAllocationGranularity;
706#if !BINNED3_USE_SEPARATE_VM_PER_POOL
707 static uint8* Binned3BaseVMPtr;
708 FPlatformMemory::FPlatformVirtualMemoryBlock Binned3BaseVMBlock;
709#else
710 static uint64 PoolSearchDiv; // if this is zero, the VM turned out to be contiguous anyway so we use a simple subtract and shift
711 static uint8* HighestPoolBaseVMPtr; // this is a duplicate of PoolBaseVMPtr[BINNED3_SMALL_POOL_COUNT - 1]
712 static uint8* PoolBaseVMPtr[BINNED3_SMALL_POOL_COUNT];
713 FPlatformMemory::FPlatformVirtualMemoryBlock PoolBaseVMBlock[BINNED3_SMALL_POOL_COUNT];
714#endif
715 // Mapping of sizes to small table indices
716 static uint8 MemSizeToIndex[1 + (BINNED3_MAX_SMALL_POOL_SIZE >> BINNED3_MINIMUM_ALIGNMENT_SHIFT)];
717
718 FORCEINLINE uint32 BoundSizeToPoolIndex(SIZE_T Size)
719 {
720 auto Index = ((Size + BINNED3_MINIMUM_ALIGNMENT - 1) >> BINNED3_MINIMUM_ALIGNMENT_SHIFT);
721 checkSlow(Index >= 0 && Index <= (BINNED3_MAX_SMALL_POOL_SIZE >> BINNED3_MINIMUM_ALIGNMENT_SHIFT)); // and it should be in the table
722 uint32 PoolIndex = uint32(MemSizeToIndex[Index]);
723 checkSlow(PoolIndex >= 0 && PoolIndex < BINNED3_SMALL_POOL_COUNT);
724 return PoolIndex;
725 }
726 FORCEINLINE uint32 PoolIndexToBlockSize(uint32 PoolIndex)
727 {
728 return uint32(SmallBlockSizesReversedShifted[BINNED3_SMALL_POOL_COUNT - PoolIndex - 1]) << BINNED3_MINIMUM_ALIGNMENT_SHIFT;
729 }
730
731 void Commit(uint32 InPoolIndex, void *Ptr, SIZE_T Size);
732 void Decommit(uint32 InPoolIndex, void *Ptr, SIZE_T Size);
733
734 static void* AllocateMetaDataMemory(SIZE_T Size);
735};
736
737PRAGMA_RESTORE_UNSAFE_TYPECAST_WARNINGS
738
739#define BINNED3_INLINE (1)
740#if BINNED3_INLINE // during development, it helps with iteration time to not include these here, but rather in the .cpp
741 #if PLATFORM_USES_FIXED_GMalloc_CLASS && !FORCE_ANSI_ALLOCATOR && 0/*USE_MALLOC_BINNED3*/
742 #define FMEMORY_INLINE_FUNCTION_DECORATOR FORCEINLINE
743 #define FMEMORY_INLINE_GMalloc (FMallocBinned3::MallocBinned3)
744 #include "FMemory.inl" // IWYU pragma: export
745 #endif
746#endif
747#endif
#define checkSlow(expr)
#define check(expr)
#define UE_BUILD_SHIPPING
Definition Build.h:4
#define WITH_EDITOR
Definition Build.h:7
#define AGGRESSIVE_MEMORY_SAVING
Definition Build.h:413
#define MAX_uint16
#define MAX_uint32
#define MAX_uint8
#define PLATFORM_HAS_FPlatformVirtualMemoryBlock
Definition Platform.h:537
#define PLATFORM_WINDOWS
Definition Platform.h:4
#define PLATFORM_USES_FIXED_GMalloc_CLASS
Definition Platform.h:413
#define FORCEINLINE
Definition Platform.h:644
#define FORCE_ANSI_ALLOCATOR
#define PLATFORM_64BITS