Ark Server API (ASA) - Wiki
Loading...
Searching...
No Matches
Float16.h
Go to the documentation of this file.
1// Copyright Epic Games, Inc. All Rights Reserved.
2
3#pragma once
4
5#include "CoreTypes.h"
6#include "Serialization/Archive.h"
7#include "Math/UnrealMathUtility.h"
8#include "Math/Float32.h"
9#include "Serialization/MemoryLayout.h"
10
11template <typename T> struct TCanBulkSerialize;
12
13/**
14* 16 bit float components and conversion
15*
16*
17* IEEE float 16
18* Represented by 10-bit mantissa M, 5-bit exponent E, and 1-bit sign S
19*
20* Specials:
21*
22* E=0, M=0 == 0.0
23* E=0, M!=0 == Denormalized value (M / 2^10) * 2^-14
24* 0<E<31, M=any == (1 + M / 2^10) * 2^(E-15)
25* E=31, M=0 == Infinity
26* E=31, M!=0 == NAN
27*
28* conversion from 32 bit float is with RTNE (round to nearest even)
29*
30* Legacy code truncated in the conversion. SetTruncate can be used for backwards compatibility.
31*
32*/
34{
35public:
36
37 /* Float16 can store values in [-MaxF16Float,MaxF16Float] */
38 constexpr static float MaxF16Float = 65504.f;
39
40 uint16 Encoded;
41
42 /** Default constructor */
43 FFloat16();
44
45 /** Copy constructor. */
46 FFloat16(const FFloat16& FP16Value);
47
48 /** Conversion constructor. Convert from Fp32 to Fp16. */
49 FFloat16(float FP32Value);
50
51 /** Assignment operator. Convert from Fp32 to Fp16. */
52 FFloat16& operator=(float FP32Value);
53
54 /** Assignment operator. Copy Fp16 value. */
55 FFloat16& operator=(const FFloat16& FP16Value);
56
57 /** Convert from Fp16 to Fp32. */
58 operator float() const;
59
60 /** Convert from Fp32 to Fp16, round-to-nearest-even. (RTNE)
61 Stores values out of range as +-Inf */
62 void Set(float FP32Value);
63
64 /*Convert from Fp32 to Fp16, round-to-nearest-even. (RTNE)
65 Clamps values out of range as +-MaxF16Float */
66 void SetClamped(float FP32Value)
67 {
69 }
70
71 /** Convert from Fp32 to Fp16, truncating low bits.
72 (backward-compatible conversion; was used by Set() previously)
73 Clamps values out of range to [-MaxF16Float,MaxF16Float] */
74 void SetTruncate(float FP32Value);
75
76 /** Set to 0.0 **/
77 void SetZero()
78 {
79 Encoded = 0;
80 }
81
82 /** Set to 1.0 **/
83 void SetOne()
84 {
85 Encoded = 0x3c00;
86 }
87
88 /** Return float clamp in [0,MaxF16Float] , no negatives or infinites or nans returned **/
90
91 /** Return float clamp in [-MaxF16Float,MaxF16Float] , no infinites or nans returned **/
93
94 /** Convert from Fp16 to Fp32. */
95 float GetFloat() const;
96
97 /** Is the float negative without converting
98 NOTE: returns true for negative zero! */
99 bool IsNegative() const
100 {
101 // negative if sign bit is on
102 // can be tested with int compare
103 return (int16)Encoded < 0;
104 }
105
106 /**
107 * Serializes the FFloat16.
108 *
109 * @param Ar Reference to the serialization archive.
110 * @param V Reference to the FFloat16 being serialized.
111 *
112 * @return Reference to the Archive after serialization.
113 */
114 friend FArchive& operator<<(FArchive& Ar, FFloat16& V)
115 {
116 return Ar << V.Encoded;
117 }
118};
119template<> struct TCanBulkSerialize<FFloat16> { enum { Value = true }; };
120
122
124 : Encoded(0)
125{ }
126
127
129{
130 Encoded = FP16Value.Encoded;
131}
132
133
135{
136 Set(FP32Value);
137}
138
139
141{
142 Set(FP32Value);
143 return *this;
144}
145
146
148{
149 Encoded = FP16Value.Encoded;
150 return *this;
151}
152
153
155{
156 return GetFloat();
157}
158
159
160// NOTE: Set() on values out of F16 max range store them as +-Inf
161FORCEINLINE void FFloat16::Set(float FP32Value)
162{
163 // FPlatformMath::StoreHalf follows RTNE (round-to-nearest-even) rounding default convention
164 FPlatformMath::StoreHalf(&Encoded, FP32Value);
165}
166
167
168
170{
171 return FPlatformMath::LoadHalf(&Encoded);
172}
173
174
175// NOTE: SetTruncate() on values out of F16 max range store them as +-Inf
176FORCEINLINE void FFloat16::SetTruncate(float FP32Value)
177{
178
179 union
180 {
181 struct
182 {
184 uint16 Mantissa : 10;
185 uint16 Exponent : 5;
186 uint16 Sign : 1;
187#else
188 uint16 Sign : 1;
189 uint16 Exponent : 5;
190 uint16 Mantissa : 10;
191#endif
192 } Components;
193
194 uint16 Encoded;
195 } FP16;
196
197
198 FFloat32 FP32(FP32Value);
199
200 // Copy sign-bit
201 FP16.Components.Sign = FP32.Components.Sign;
202
203 // Check for zero, denormal or too small value.
204 if (FP32.Components.Exponent <= 112) // Too small exponent? (0+127-15)
205 {
206 // Set to 0.
207 FP16.Components.Exponent = 0;
208 FP16.Components.Mantissa = 0;
209
210 // Exponent unbias the single, then bias the halfp
211 const int32 NewExp = FP32.Components.Exponent - 127 + 15;
212
213 if ( (14 - NewExp) <= 24 ) // Mantissa might be non-zero
214 {
215 uint32 Mantissa = FP32.Components.Mantissa | 0x800000; // Hidden 1 bit
216 FP16.Components.Mantissa = (uint16)(Mantissa >> (14 - NewExp));
217 // Check for rounding
218 if ( (Mantissa >> (13 - NewExp)) & 1 ) //-V1051
219 {
220 FP16.Encoded++; // Round, might overflow into exp bit, but this is OK
221 }
222 }
223 }
224 // Check for INF or NaN, or too high value
225 else if (FP32.Components.Exponent >= 143) // Too large exponent? (31+127-15)
226 {
227 // Set to 65504.0 (max value)
228 FP16.Components.Exponent = 30;
229 FP16.Components.Mantissa = 1023;
230 }
231 // Handle normal number.
232 else
233 {
234 FP16.Components.Exponent = uint16(int32(FP32.Components.Exponent) - 127 + 15);
235 FP16.Components.Mantissa = uint16(FP32.Components.Mantissa >> 13);
236 }
237
238 Encoded = FP16.Encoded;
239}
240
241/** Return float clamp in [0,MaxF16Float] , no negatives or infinites or nans returned **/
243{
244 FFloat16 ReturnValue;
245
246 if ( Encoded < 0x7c00 ) // normal and non-negative, just pass through
247 ReturnValue.Encoded = Encoded;
248 else if ( Encoded == 0x7c00 ) // infinity turns into largest normal
249 ReturnValue.Encoded = 0x7bff;
250 else // NaNs or anything negative turns into 0
251 ReturnValue.Encoded = 0;
252
253 return ReturnValue;
254}
255
256
257/** Return float clamp in [-MaxF16Float,MaxF16Float] , no infinites or nans returned **/
259{
260 FFloat16 ReturnValue;
261
262 if ( (Encoded&0x7c00) == 0x7c00 )
263 {
264 // inf or nan
265 if ( Encoded == 0x7C00 ) //+inf
266 {
267 ReturnValue.Encoded = 0x7bff; // max finite
268 }
269 else if ( Encoded == 0xFC00 ) //-inf
270 {
271 ReturnValue.Encoded = 0xfbff; // max finite negative
272 }
273 else
274 {
275 // nan
276 ReturnValue.Encoded = 0;
277 }
278 }
279 else
280 {
281 ReturnValue.Encoded = Encoded;
282 }
283
284 return ReturnValue;
285}
#define DECLARE_INTRINSIC_TYPE_LAYOUT(T)
#define PLATFORM_LITTLE_ENDIAN
Definition Platform.h:144
#define FORCEINLINE
Definition Platform.h:644
FFloat16 GetClampedNonNegativeAndFinite() const
Definition Float16.h:242
FFloat16()
Definition Float16.h:123
static constexpr float MaxF16Float
Definition Float16.h:38
void SetZero()
Definition Float16.h:77
void Set(float FP32Value)
Definition Float16.h:161
void SetClamped(float FP32Value)
Definition Float16.h:66
FFloat16 GetClampedFinite() const
Definition Float16.h:258
FFloat16 & operator=(const FFloat16 &FP16Value)
Definition Float16.h:147
FFloat16 & operator=(float FP32Value)
Definition Float16.h:140
operator float() const
Definition Float16.h:154
FFloat16(const FFloat16 &FP16Value)
Definition Float16.h:128
uint16 Encoded
Definition Float16.h:40
void SetTruncate(float FP32Value)
Definition Float16.h:176
float GetFloat() const
Definition Float16.h:169
bool IsNegative() const
Definition Float16.h:99
FFloat16(float FP32Value)
Definition Float16.h:134
void SetOne()
Definition Float16.h:83
FFloat32(float InValue=0.0f)
Definition Float32.h:41
static UE_NODISCARD constexpr FORCEINLINE float Clamp(const float X, const float Min, const float Max)