Ark Server API (ASA) - Wiki
Loading...
Searching...
No Matches
MallocBinnedGPU.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2
3#pragma once
4
5#include "Containers/Array.h"
6#include "CoreTypes.h"
7#include "HAL/PlatformAtomics.h"
8#include "HAL/PlatformMemory.h"
9#include "Math/UnrealMathUtility.h"
10#include "Templates/Atomic.h"
11#include "Templates/MemoryOps.h"
12
14#include "HAL/Allocators/CachedOSPageAllocator.h"
15#include "HAL/Allocators/PooledVirtualMemoryAllocator.h"
16#include "HAL/CriticalSection.h"
17#include "HAL/LowLevelMemTracker.h"
18#include "HAL/MallocBinnedCommon.h"
19#include "HAL/MemoryBase.h"
20#include "HAL/PlatformMath.h"
21#include "HAL/PlatformTLS.h"
22#include "HAL/UnrealMemory.h"
23#include "Math/NumericLimits.h"
24#include "Misc/AssertionMacros.h"
25#include "Misc/ScopeLock.h"
26#include "Misc/ScopeLock.h"
27#include "Templates/AlignmentTemplates.h"
28
29
30#define BINNEDGPU_MAX_GMallocBinnedGPUMaxBundlesBeforeRecycle (8)
31
32#define COLLECT_BINNEDGPU_STATS (!UE_BUILD_SHIPPING)
33
34#if COLLECT_BINNEDGPU_STATS
35 #define MBG_STAT(x) x
36#else
37 #define MBG_STAT(x)
38#endif
39
40PRAGMA_DISABLE_UNSAFE_TYPECAST_WARNINGS
41
42class FMallocBinnedGPU final : public FMalloc
43{
44 struct FGlobalRecycler;
45 struct FPoolInfoLarge;
46 struct FPoolInfoSmall;
47 struct FPoolTable;
48 struct PoolHashBucket;
49 struct Private;
50
51
52 struct FGPUMemoryBlockProxy
53 {
54 uint8 MemoryModifiedByCPU[32 - sizeof(void*)]; // might be modified for free list links, etc
55 void *GPUMemory; // pointer to the actual GPU memory, which we cannot modify with the CPU
56
57 FGPUMemoryBlockProxy(void *InGPUMemory)
58 : GPUMemory(InGPUMemory)
59 {
60 check(GPUMemory);
61 }
62 };
63
64 struct FFreeBlock
65 {
66 enum
67 {
68 CANARY_VALUE = 0xc3
69 };
70
71 FORCEINLINE FFreeBlock(uint32 InPageSize, uint32 InBlockSize, uint32 InPoolIndex, uint8 MinimumAlignmentShift)
72 : BlockSizeShifted(InBlockSize >> MinimumAlignmentShift)
73 , PoolIndex(InPoolIndex)
74 , Canary(CANARY_VALUE)
75 , NextFreeBlock(nullptr)
76 {
77 check(InPoolIndex < MAX_uint8 && (InBlockSize >> MinimumAlignmentShift) <= MAX_uint16);
78 NumFreeBlocks = InPageSize / InBlockSize;
79 }
80
81 FORCEINLINE uint32 GetNumFreeRegularBlocks() const
82 {
83 return NumFreeBlocks;
84 }
85 FORCEINLINE bool IsCanaryOk() const
86 {
87 return Canary == FFreeBlock::CANARY_VALUE;
88 }
89
90 FORCEINLINE void CanaryTest() const
91 {
92 if (!IsCanaryOk())
93 {
94 CanaryFail();
95 }
96 }
97 void CanaryFail() const;
98
99 FORCEINLINE void* AllocateRegularBlock(uint8 MinimumAlignmentShift)
100 {
101 --NumFreeBlocks;
102 return (uint8*)(((FGPUMemoryBlockProxy*)this)->GPUMemory) + NumFreeBlocks * (uint32(BlockSizeShifted) << MinimumAlignmentShift);
103 }
104
105 uint16 BlockSizeShifted; // Size of the blocks that this list points to >> ArenaParams.MinimumAlignmentShift
106 uint8 PoolIndex; // Index of this pool
107 uint8 Canary; // Constant value of 0xe3
108 uint32 NumFreeBlocks; // Number of consecutive free blocks here, at least 1.
109 FFreeBlock* NextFreeBlock; // Next free block or nullptr
110 };
111
112 struct FPoolTable
113 {
114 uint32 BlockSize;
115 uint16 BlocksPerBlockOfBlocks;
116 uint8 PagesPlatformForBlockOfBlocks;
117
118 FBitTree BlockOfBlockAllocationBits; // one bits in here mean the virtual memory is committed
119 FBitTree BlockOfBlockIsExhausted; // one bit in here means the pool is completely full
120
121 uint32 NumEverUsedBlockOfBlocks;
122 FPoolInfoSmall** PoolInfos;
123
124 uint64 UnusedAreaOffsetLow;
125 };
126
127 struct FPtrToPoolMapping
128 {
129 FPtrToPoolMapping()
130 : PtrToPoolPageBitShift(0)
131 , HashKeyShift(0)
132 , PoolMask(0)
133 , MaxHashBuckets(0)
134 {
135 }
136 explicit FPtrToPoolMapping(uint32 InPageSize, uint64 InNumPoolsPerPage, uint64 AddressLimit)
137 {
138 Init(InPageSize, InNumPoolsPerPage, AddressLimit);
139 }
140
141 void Init(uint32 InPageSize, uint64 InNumPoolsPerPage, uint64 AddressLimit)
142 {
143 uint64 PoolPageToPoolBitShift = FPlatformMath::CeilLogTwo(InNumPoolsPerPage);
144
145 PtrToPoolPageBitShift = FPlatformMath::CeilLogTwo(InPageSize);
146 HashKeyShift = PtrToPoolPageBitShift + PoolPageToPoolBitShift;
147 PoolMask = (1ull << PoolPageToPoolBitShift) - 1;
148 MaxHashBuckets = AddressLimit >> HashKeyShift;
149 }
150
151 FORCEINLINE void GetHashBucketAndPoolIndices(const void* InPtr, uint32& OutBucketIndex, UPTRINT& OutBucketCollision, uint32& OutPoolIndex) const
152 {
153 OutBucketCollision = (UPTRINT)InPtr >> HashKeyShift;
154 OutBucketIndex = uint32(OutBucketCollision & (MaxHashBuckets - 1));
155 OutPoolIndex = ((UPTRINT)InPtr >> PtrToPoolPageBitShift) & PoolMask;
156 }
157
158 FORCEINLINE uint64 GetMaxHashBuckets() const
159 {
160 return MaxHashBuckets;
161 }
162
163 private:
164 /** Shift to apply to a pointer to get the reference from the indirect tables */
165 uint64 PtrToPoolPageBitShift;
166 /** Shift required to get required hash table key. */
167 uint64 HashKeyShift;
168 /** Used to mask off the bits that have been used to lookup the indirect table */
169 uint64 PoolMask;
170 // PageSize dependent constants
171 uint64 MaxHashBuckets;
172 };
173
174 struct FBundleNode
175 {
176 FBundleNode* NextNodeInCurrentBundle;
177 union
178 {
179 FBundleNode* NextBundle;
180 int32 Count;
181 };
182 };
183
184 struct FBundle
185 {
186 FORCEINLINE FBundle()
187 {
188 Reset();
189 }
190
191 FORCEINLINE void Reset()
192 {
193 Head = nullptr;
194 Count = 0;
195 }
196
197 FORCEINLINE void PushHead(FBundleNode* Node)
198 {
199 Node->NextNodeInCurrentBundle = Head;
200 Node->NextBundle = nullptr;
201 Head = Node;
202 Count++;
203 }
204
205 FORCEINLINE FBundleNode* PopHead()
206 {
207 FBundleNode* Result = Head;
208
209 Count--;
210 Head = Head->NextNodeInCurrentBundle;
211 return Result;
212 }
213
214 FBundleNode* Head;
215 uint32 Count;
216 };
217
218 struct FFreeBlockList
219 {
220 // return true if we actually pushed it
221 FORCEINLINE bool PushToFront(FMallocBinnedGPU& Allocator, void* InPtr, uint32 InPoolIndex, uint32 InBlockSize, const FArenaParams& LocalArenaParams)
222 {
223 check(InPtr);
224
225 if ((PartialBundle.Count >= (uint32)LocalArenaParams.MaxBlocksPerBundle) | (PartialBundle.Count * InBlockSize >= (uint32)LocalArenaParams.MaxSizePerBundle))
226 {
227 if (FullBundle.Head)
228 {
229 return false;
230 }
231 FullBundle = PartialBundle;
232 PartialBundle.Reset();
233 }
234 PartialBundle.PushHead((FBundleNode*)new FGPUMemoryBlockProxy(InPtr));
235 MBG_STAT(Allocator.GPUProxyMemory += sizeof(FGPUMemoryBlockProxy);)
236 return true;
237 }
238 FORCEINLINE bool CanPushToFront(uint32 InPoolIndex, uint32 InBlockSize, const FArenaParams& LocalArenaParams)
239 {
240 return !((!!FullBundle.Head) & ((PartialBundle.Count >= (uint32)LocalArenaParams.MaxBlocksPerBundle) | (PartialBundle.Count * InBlockSize >= (uint32)LocalArenaParams.MaxSizePerBundle)));
241 }
242 FORCEINLINE void* PopFromFront(FMallocBinnedGPU& Allocator, uint32 InPoolIndex)
243 {
244 if ((!PartialBundle.Head) & (!!FullBundle.Head))
245 {
246 PartialBundle = FullBundle;
247 FullBundle.Reset();
248 }
249 void *Result = nullptr;
250 if (PartialBundle.Head)
251 {
252 FGPUMemoryBlockProxy* Proxy = (FGPUMemoryBlockProxy*)PartialBundle.PopHead();
253 Result = Proxy->GPUMemory;
254 check(Result);
255 delete Proxy;
256 MBG_STAT(Allocator.GPUProxyMemory -= sizeof(FGPUMemoryBlockProxy);)
257 }
258 return Result;
259 }
260
261 // tries to recycle the full bundle, if that fails, it is returned for freeing
262 FBundleNode* RecyleFull(FArenaParams& LocalArenaParams, FGlobalRecycler& GGlobalRecycler, uint32 InPoolIndex);
263 bool ObtainPartial(FArenaParams& LocalArenaParams, FGlobalRecycler& GGlobalRecycler, uint32 InPoolIndex);
264 FBundleNode* PopBundles(uint32 InPoolIndex);
265 private:
266 FBundle PartialBundle;
267 FBundle FullBundle;
268 };
269
270 struct FPerThreadFreeBlockLists
271 {
272 FORCEINLINE static FPerThreadFreeBlockLists* Get(uint32 BinnedGPUTlsSlot)
273 {
274 return BinnedGPUTlsSlot ? (FPerThreadFreeBlockLists*)FPlatformTLS::GetTlsValue(BinnedGPUTlsSlot) : nullptr;
275 }
276 static void SetTLS(FMallocBinnedGPU& Allocator);
277 static int64 ClearTLS(FMallocBinnedGPU& Allocator);
278
279 FPerThreadFreeBlockLists(uint32 PoolCount)
280 : AllocatedMemory(0)
281 {
282 FreeLists.AddDefaulted(PoolCount);
283 }
284
285 FORCEINLINE void* Malloc(FMallocBinnedGPU& Allocator, uint32 InPoolIndex)
286 {
287 return FreeLists[InPoolIndex].PopFromFront(Allocator, InPoolIndex);
288 }
289 // return true if the pointer was pushed
290 FORCEINLINE bool Free(FMallocBinnedGPU& Allocator, void* InPtr, uint32 InPoolIndex, uint32 InBlockSize, const FArenaParams& LocalArenaParams)
291 {
292 return FreeLists[InPoolIndex].PushToFront(Allocator, InPtr, InPoolIndex, InBlockSize, LocalArenaParams);
293 }
294 // return true if a pointer can be pushed
295 FORCEINLINE bool CanFree(uint32 InPoolIndex, uint32 InBlockSize, const FArenaParams& LocalArenaParams)
296 {
297 return FreeLists[InPoolIndex].CanPushToFront(InPoolIndex, InBlockSize, LocalArenaParams);
298 }
299 // returns a bundle that needs to be freed if it can't be recycled
300 FBundleNode* RecycleFullBundle(FArenaParams& LocalArenaParams, FGlobalRecycler& GlobalRecycler, uint32 InPoolIndex)
301 {
302 return FreeLists[InPoolIndex].RecyleFull(LocalArenaParams, GlobalRecycler, InPoolIndex);
303 }
304 // returns true if we have anything to pop
305 bool ObtainRecycledPartial(FArenaParams& LocalArenaParams, FGlobalRecycler& GlobalRecycler, uint32 InPoolIndex)
306 {
307 return FreeLists[InPoolIndex].ObtainPartial(LocalArenaParams, GlobalRecycler, InPoolIndex);
308 }
309 FBundleNode* PopBundles(uint32 InPoolIndex)
310 {
311 return FreeLists[InPoolIndex].PopBundles(InPoolIndex);
312 }
313 int64 AllocatedMemory;
314 TArray<FFreeBlockList> FreeLists;
315 };
316
317 struct FGlobalRecycler
318 {
319 void Init(uint32 PoolCount)
320 {
321 Bundles.AddDefaulted(PoolCount);
322 }
323 bool PushBundle(uint32 NumCachedBundles, uint32 InPoolIndex, FBundleNode* InBundle)
324 {
325 for (uint32 Slot = 0; Slot < NumCachedBundles && Slot < BINNEDGPU_MAX_GMallocBinnedGPUMaxBundlesBeforeRecycle; Slot++)
326 {
327 if (!Bundles[InPoolIndex].FreeBundles[Slot])
328 {
329 if (!FPlatformAtomics::InterlockedCompareExchangePointer((void**)&Bundles[InPoolIndex].FreeBundles[Slot], InBundle, nullptr))
330 {
331 return true;
332 }
333 }
334 }
335 return false;
336 }
337
338 FBundleNode* PopBundle(uint32 NumCachedBundles, uint32 InPoolIndex)
339 {
340 for (uint32 Slot = 0; Slot < NumCachedBundles && Slot < BINNEDGPU_MAX_GMallocBinnedGPUMaxBundlesBeforeRecycle; Slot++)
341 {
342 FBundleNode* Result = Bundles[InPoolIndex].FreeBundles[Slot];
343 if (Result)
344 {
345 if (FPlatformAtomics::InterlockedCompareExchangePointer((void**)&Bundles[InPoolIndex].FreeBundles[Slot], nullptr, Result) == Result)
346 {
347 return Result;
348 }
349 }
350 }
351 return nullptr;
352 }
353
354 private:
355 struct FPaddedBundlePointer
356 {
357 FBundleNode* FreeBundles[BINNEDGPU_MAX_GMallocBinnedGPUMaxBundlesBeforeRecycle];
358 FPaddedBundlePointer()
359 {
360 DefaultConstructItems<FBundleNode*>(FreeBundles, BINNEDGPU_MAX_GMallocBinnedGPUMaxBundlesBeforeRecycle);
361 }
362 };
363 TArray<FPaddedBundlePointer> Bundles;
364 };
365
366
367 FORCEINLINE uint64 PoolIndexFromPtr(const void* Ptr)
368 {
369 if (PoolSearchDiv == 0)
370 {
371 return (UPTRINT(Ptr) - UPTRINT(PoolBaseVMPtr[0])) >> ArenaParams.MaxMemoryPerBlockSizeShift;
372 }
373 uint64 PoolIndex = ArenaParams.PoolCount;
374 if (((uint8*)Ptr >= PoolBaseVMPtr[0]) & ((uint8*)Ptr < HighestPoolBaseVMPtr + ArenaParams.MaxMemoryPerBlockSize))
375 {
376 PoolIndex = uint64((uint8*)Ptr - PoolBaseVMPtr[0]) / PoolSearchDiv;
377 if (PoolIndex >= ArenaParams.PoolCount)
378 {
379 PoolIndex = ArenaParams.PoolCount - 1;
380 }
381 if ((uint8*)Ptr < PoolBaseVMPtr[(int32)PoolIndex])
382 {
383 do
384 {
385 PoolIndex--;
386 check(PoolIndex < ArenaParams.PoolCount);
387 } while ((uint8*)Ptr < PoolBaseVMPtr[(int32)PoolIndex]);
388 if ((uint8*)Ptr >= PoolBaseVMPtr[(int32)PoolIndex] + ArenaParams.MaxMemoryPerBlockSize)
389 {
390 PoolIndex = ArenaParams.PoolCount; // was in the gap
391 }
392 }
393 else if ((uint8*)Ptr >= PoolBaseVMPtr[(int32)PoolIndex] + ArenaParams.MaxMemoryPerBlockSize)
394 {
395 do
396 {
397 PoolIndex++;
398 check(PoolIndex < ArenaParams.PoolCount);
399 } while ((uint8*)Ptr >= PoolBaseVMPtr[(int32)PoolIndex] + ArenaParams.MaxMemoryPerBlockSize);
400 if ((uint8*)Ptr < PoolBaseVMPtr[(int32)PoolIndex])
401 {
402 PoolIndex = ArenaParams.PoolCount; // was in the gap
403 }
404 }
405 }
406 return PoolIndex;
407 }
408
409 FORCEINLINE uint8* PoolBasePtr(uint32 InPoolIndex)
410 {
411 return PoolBaseVMPtr[InPoolIndex];
412 }
413 FORCEINLINE uint64 PoolIndexFromPtrChecked(const void* Ptr)
414 {
415 uint64 Result = PoolIndexFromPtr(Ptr);
416 check(Result < ArenaParams.PoolCount);
417 return Result;
418 }
419
420 FORCEINLINE bool IsOSAllocation(const void* Ptr)
421 {
422 return PoolIndexFromPtr(Ptr) >= ArenaParams.PoolCount;
423 }
424
425
426 FORCEINLINE void* BlockOfBlocksPointerFromContainedPtr(const void* Ptr, uint8 PagesPlatformForBlockOfBlocks, uint32& OutBlockOfBlocksIndex)
427 {
428 uint32 PoolIndex = PoolIndexFromPtrChecked(Ptr);
429 uint8* PoolStart = PoolBasePtr(PoolIndex);
430 uint64 BlockOfBlocksIndex = (UPTRINT(Ptr) - UPTRINT(PoolStart)) / (UPTRINT(PagesPlatformForBlockOfBlocks) * UPTRINT(ArenaParams.AllocationGranularity));
431 OutBlockOfBlocksIndex = BlockOfBlocksIndex;
432
433 uint8* Result = PoolStart + BlockOfBlocksIndex * UPTRINT(PagesPlatformForBlockOfBlocks) * UPTRINT(ArenaParams.AllocationGranularity);
434
435 check(Result < PoolStart + ArenaParams.MaxMemoryPerBlockSize);
436 return Result;
437 }
438 FORCEINLINE uint8* BlockPointerFromIndecies(uint32 InPoolIndex, uint32 BlockOfBlocksIndex, uint32 BlockOfBlocksSize)
439 {
440 uint8* PoolStart = PoolBasePtr(InPoolIndex);
441 uint8* Ptr = PoolStart + BlockOfBlocksIndex * uint64(BlockOfBlocksSize);
442 check(Ptr + BlockOfBlocksSize <= PoolStart + ArenaParams.MaxMemoryPerBlockSize);
443 return Ptr;
444 }
445 FPoolInfoSmall* PushNewPoolToFront(FMallocBinnedGPU& Allocator, uint32 InBlockSize, uint32 InPoolIndex, uint32& OutBlockOfBlocksIndex);
446 FPoolInfoSmall* GetFrontPool(FPoolTable& Table, uint32 InPoolIndex, uint32& OutBlockOfBlocksIndex);
447
448 FORCEINLINE bool AdjustSmallBlockSizeForAlignment(SIZE_T& InOutSize, uint32 Alignment)
449 {
450 if ((InOutSize <= ArenaParams.MaxPoolSize) & (Alignment <= ArenaParams.MinimumAlignment)) // one branch, not two
451 {
452 return true;
453 }
454 SIZE_T AlignedSize = Align(InOutSize, Alignment);
455 if (ArenaParams.bAttemptToAlignSmallBocks & (AlignedSize <= ArenaParams.MaxPoolSize) & (Alignment <= ArenaParams.MaximumAlignmentForSmallBlock)) // one branch, not three
456 {
457 uint32 PoolIndex = BoundSizeToPoolIndex(AlignedSize);
458 while (true)
459 {
460 uint32 BlockSize = PoolIndexToBlockSize(PoolIndex);
461 if (IsAligned(BlockSize, Alignment))
462 {
463 InOutSize = SIZE_T(BlockSize);
464 return true;
465 }
466 PoolIndex++;
467 check(PoolIndex < ArenaParams.PoolCount);
468 }
469 }
470 return false;
471 }
472
473public:
474
475
476 FMallocBinnedGPU();
477 FArenaParams& GetParams()
478 {
479 return ArenaParams;
480 }
481 void InitMallocBinned();
482
483 virtual ~FMallocBinnedGPU();
484
485
486 // FMalloc interface.
487 virtual bool IsInternallyThreadSafe() const override;
488 FORCEINLINE virtual void* Malloc(SIZE_T Size, uint32 Alignment) override
489 {
490 Alignment = FMath::Max<uint32>(Alignment, ArenaParams.MinimumAlignment);
491
492 void* Result = nullptr;
493
494 // Only allocate from the small pools if the size is small enough and the alignment isn't crazy large.
495 // With large alignments, we'll waste a lot of memory allocating an entire page, but such alignments are highly unlikely in practice.
496 if (AdjustSmallBlockSizeForAlignment(Size, Alignment))
497 {
498 FPerThreadFreeBlockLists* Lists = ArenaParams.bPerThreadCaches ? FPerThreadFreeBlockLists::Get(BinnedGPUTlsSlot) : nullptr;
499 if (Lists)
500 {
501 uint32 PoolIndex = BoundSizeToPoolIndex(Size);
502 uint32 BlockSize = PoolIndexToBlockSize(PoolIndex);
503 Result = Lists->Malloc(*this, PoolIndex);
504 if (Result)
505 {
506 Lists->AllocatedMemory += BlockSize;
507 checkSlow(IsAligned(Result, Alignment));
508 }
509 }
510 }
511 if (Result == nullptr)
512 {
513 Result = MallocExternal(Size, Alignment);
514 }
515
516 return Result;
517 }
518 FORCEINLINE virtual void* Realloc(void* Ptr, SIZE_T NewSize, uint32 Alignment) override
519 {
520 check(!"MallocBinnedGPU cannot realloc memory because the memory is assumed to not be writable by the CPU");
521 return nullptr;
522 }
523
524 FORCEINLINE virtual void Free(void* Ptr) override
525 {
526 uint64 PoolIndex = PoolIndexFromPtr(Ptr);
527 if (PoolIndex < ArenaParams.PoolCount)
528 {
529 FPerThreadFreeBlockLists* Lists = ArenaParams.bPerThreadCaches ? FPerThreadFreeBlockLists::Get(BinnedGPUTlsSlot) : nullptr;
530 if (Lists)
531 {
532 int32 BlockSize = PoolIndexToBlockSize(PoolIndex);
533 if (Lists->Free(*this, Ptr, PoolIndex, BlockSize, ArenaParams))
534 {
535 Lists->AllocatedMemory -= BlockSize;
536 return;
537 }
538 }
539 }
540 FreeExternal(Ptr);
541 }
542 FORCEINLINE virtual bool GetAllocationSize(void *Ptr, SIZE_T &SizeOut) override
543 {
544 uint64 PoolIndex = PoolIndexFromPtr(Ptr);
545 if (PoolIndex < ArenaParams.PoolCount)
546 {
547 SizeOut = PoolIndexToBlockSize(PoolIndex);
548 return true;
549 }
550 return GetAllocationSizeExternal(Ptr, SizeOut);
551 }
552
553 FORCEINLINE virtual SIZE_T QuantizeSize(SIZE_T Count, uint32 Alignment) override
554 {
555 check(DEFAULT_ALIGNMENT <= ArenaParams.MinimumAlignment); // used below
556 checkSlow((Alignment & (Alignment - 1)) == 0); // Check the alignment is a power of two
557 SIZE_T SizeOut;
558 if ((Count <= ArenaParams.MaxPoolSize) & (Alignment <= ArenaParams.MinimumAlignment)) // one branch, not two
559 {
560 SizeOut = PoolIndexToBlockSize(BoundSizeToPoolIndex(Count));
561 }
562 else
563 {
564 Alignment = FPlatformMath::Max<uint32>(Alignment, ArenaParams.AllocationGranularity);
565 SizeOut = Align(Count, Alignment);
566 }
567 check(SizeOut >= Count);
568 return SizeOut;
569 }
570
571 virtual bool ValidateHeap() override;
572 virtual void Trim(bool bTrimThreadCaches) override;
573 virtual void SetupTLSCachesOnCurrentThread() override;
574 virtual void ClearAndDisableTLSCachesOnCurrentThread() override;
575 virtual const TCHAR* GetDescriptiveName() override;
576 // End FMalloc interface.
577
578 void FlushCurrentThreadCache();
579 void* MallocExternal(SIZE_T Size, uint32 Alignment);
580 void FreeExternal(void *Ptr);
581 bool GetAllocationSizeExternal(void* Ptr, SIZE_T& SizeOut);
582
583 MBG_STAT(int64 GetTotalAllocatedSmallPoolMemory();)
584 virtual void GetAllocatorStats(FGenericMemoryStats& out_Stats) override;
585 /** Dumps current allocator stats to the log. */
586 virtual void DumpAllocatorStats(class FOutputDevice& Ar) override;
587
588 FORCEINLINE uint32 BoundSizeToPoolIndex(SIZE_T Size)
589 {
590 auto Index = ((Size + ArenaParams.MinimumAlignment - 1) >> ArenaParams.MinimumAlignmentShift);
591 checkSlow(Index >= 0 && Index <= (ArenaParams.MaxPoolSize >> ArenaParams.MinimumAlignmentShift)); // and it should be in the table
592 uint32 PoolIndex = uint32(MemSizeToIndex[Index]);
593 checkSlow(PoolIndex >= 0 && PoolIndex < ArenaParams.PoolCount);
594 return PoolIndex;
595 }
596 FORCEINLINE uint32 PoolIndexToBlockSize(uint32 PoolIndex)
597 {
598 return uint32(SmallBlockSizesReversedShifted[ArenaParams.PoolCount - PoolIndex - 1]) << ArenaParams.MinimumAlignmentShift;
599 }
600
601 void Commit(uint32 InPoolIndex, void *Ptr, SIZE_T Size);
602 void Decommit(uint32 InPoolIndex, void *Ptr, SIZE_T Size);
603
604
605 // Pool tables for different pool sizes
606 TArray<FPoolTable> SmallPoolTables;
607
608 uint32 SmallPoolInfosPerPlatformPage;
609
610 PoolHashBucket* HashBuckets;
611 PoolHashBucket* HashBucketFreeList;
612 uint64 NumLargePoolsPerPage;
613
614 FCriticalSection Mutex;
615 FGlobalRecycler GGlobalRecycler;
616 FPtrToPoolMapping PtrToPoolMapping;
617
618 FArenaParams ArenaParams;
619
620 TArray<uint16> SmallBlockSizesReversedShifted; // this is reversed to get the smallest elements on our main cache line
621 uint32 BinnedGPUTlsSlot;
622 uint64 PoolSearchDiv; // if this is zero, the VM turned out to be contiguous anyway so we use a simple subtract and shift
623 uint8* HighestPoolBaseVMPtr; // this is a duplicate of PoolBaseVMPtr[ArenaParams.PoolCount - 1]
624 FPlatformMemory::FPlatformVirtualMemoryBlock PoolBaseVMBlock;
625 TArray<uint8*> PoolBaseVMPtr;
626 TArray<FPlatformMemory::FPlatformVirtualMemoryBlock> PoolBaseVMBlocks;
627 // Mapping of sizes to small table indices
628 TArray<uint8> MemSizeToIndex;
629
630 MBG_STAT(
631 int64 BinnedGPUAllocatedSmallPoolMemory = 0; // memory that's requested to be allocated by the game
632 int64 BinnedGPUAllocatedOSSmallPoolMemory = 0;
633
634 int64 BinnedGPUAllocatedLargePoolMemory = 0; // memory requests to the OS which don't fit in the small pool
635 int64 BinnedGPUAllocatedLargePoolMemoryWAlignment = 0; // when we allocate at OS level we need to align to a size
636
637 int64 BinnedGPUPoolInfoMemory = 0;
638 int64 BinnedGPUHashMemory = 0;
639 int64 BinnedGPUFreeBitsMemory = 0;
640 int64 BinnedGPUTLSMemory = 0;
641 TAtomic<int64> ConsolidatedMemory;
642 TAtomic<int64> GPUProxyMemory;
643 )
644
645 FCriticalSection FreeBlockListsRegistrationMutex;
646 FCriticalSection& GetFreeBlockListsRegistrationMutex()
647 {
648 return FreeBlockListsRegistrationMutex;
649 }
650 TArray<FPerThreadFreeBlockLists*> RegisteredFreeBlockLists;
651 TArray<FPerThreadFreeBlockLists*>& GetRegisteredFreeBlockLists()
652 {
653 return RegisteredFreeBlockLists;
654 }
655 void RegisterThreadFreeBlockLists(FPerThreadFreeBlockLists* FreeBlockLists)
656 {
657 FScopeLock Lock(&GetFreeBlockListsRegistrationMutex());
658 GetRegisteredFreeBlockLists().Add(FreeBlockLists);
659 }
660 int64 UnregisterThreadFreeBlockLists(FPerThreadFreeBlockLists* FreeBlockLists)
661 {
662 FScopeLock Lock(&GetFreeBlockListsRegistrationMutex());
663 GetRegisteredFreeBlockLists().Remove(FreeBlockLists);
664 return FreeBlockLists->AllocatedMemory;
665 }
666
667 TArray<void*> MallocedPointers;
668};
669
670PRAGMA_RESTORE_UNSAFE_TYPECAST_WARNINGS
671
672#endif
#define checkSlow(expr)
#define check(expr)
#define UE_BUILD_SHIPPING
Definition Build.h:4
#define MAX_uint16
#define MAX_uint8
#define PLATFORM_HAS_FPlatformVirtualMemoryBlock
Definition Platform.h:537
#define FORCEINLINE
Definition Platform.h:644
#define PLATFORM_64BITS