packedfloat.h 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. #pragma once
  2. #include <util/generic/cast.h>
  3. #include <util/generic/ylimits.h>
  4. #include <util/system/hi_lo.h>
  5. #include <cmath>
  6. #include <cfloat>
  7. #include <limits>
  8. #include <algorithm>
  9. #include <cassert>
  10. namespace NPackedFloat {
  11. /*
  12. Exponent Mantissa zero Mantissa non-zero Equation
  13. 0x00 zero denormal (-1)^sign * 2^-126 * 0.mantissa
  14. 0x01–0xfe normalized value (-1)^sign * 2^(exponent - 127) * 1.mantissa
  15. 0xff infinity NaN
  16. * */
  17. //fast 16 bit floats by melkov
  18. template <ui8 SIGNED>
  19. struct float16 {
  20. private:
  21. typedef float16<SIGNED> self;
  22. public:
  23. ui16 val;
  24. explicit float16(ui16 v = 0)
  25. : val(v)
  26. {
  27. }
  28. self& operator=(float t) {
  29. assert(SIGNED == 1 || SIGNED == 0 && t >= 0.);
  30. val = BitCast<ui32>(t) >> (15 + SIGNED);
  31. return *this;
  32. }
  33. operator float() const {
  34. return BitCast<float>((ui32)val << (15 + SIGNED));
  35. }
  36. static self New(float v) {
  37. self f;
  38. return f = v;
  39. }
  40. static self denorm_min() {
  41. return self(0x0001);
  42. }
  43. static self min() {
  44. return self(SIGNED ? 0x0080 : 0x0100);
  45. }
  46. static self max() {
  47. return self(SIGNED ? 0x7f7f : 0xfeff);
  48. }
  49. };
  50. //fast 8 bit floats
  51. template <ui8 SIGNED, ui8 DENORM = 0>
  52. struct float8 {
  53. private:
  54. typedef float8<SIGNED, DENORM> self;
  55. enum {
  56. FMinExp = SIGNED ? 0x7c : 0x78,
  57. FMaxExp = SIGNED ? 0x83 : 0x87,
  58. MaxExp = SIGNED ? 0x70 : 0xf0,
  59. };
  60. public:
  61. ui8 val;
  62. explicit float8(ui8 v = 0)
  63. : val(v)
  64. {
  65. }
  66. self& operator=(float t) {
  67. assert(SIGNED == 1 || SIGNED == 0 && t >= 0.);
  68. ui16 hi16 = Hi16(t);
  69. ui8 sign = SIGNED ? Hi8(hi16) & 0x80 : 0;
  70. hi16 <<= 1;
  71. ui8 fexp = Hi8(hi16);
  72. ui8 exp;
  73. ui8 frac = (Lo8(hi16) & 0xf0) >> 4;
  74. if (fexp <= FMinExp) {
  75. exp = 0;
  76. frac = DENORM ? ((ui8)(0x10 | frac) >> std::min<int>((FMinExp - fexp + 1), 8)) : 0;
  77. } else if (fexp > FMaxExp) {
  78. exp = MaxExp;
  79. frac = 0x0f;
  80. } else {
  81. exp = (fexp - FMinExp) << 4;
  82. }
  83. val = sign | exp | frac;
  84. return *this;
  85. }
  86. operator float() const {
  87. ui32 v = 0;
  88. v |= SIGNED ? (val & 0x80) << 24 : 0;
  89. ui8 frac = val & 0x0f;
  90. ui8 exp = val & MaxExp;
  91. if (exp) {
  92. v |= ((exp >> 4) + FMinExp) << 23 | frac << 19;
  93. } else if (DENORM && val & 0x0f) {
  94. while (!(frac & 0x10)) {
  95. frac <<= 1;
  96. ++exp;
  97. }
  98. v |= (FMinExp - exp + 1) << 23 | (frac & 0x0f) << 19;
  99. } else
  100. v |= 0;
  101. return BitCast<float>(v);
  102. }
  103. static self New(float v) {
  104. self f;
  105. return f = v;
  106. }
  107. static self denorm_min() {
  108. return self(0x01);
  109. }
  110. static self min() {
  111. return self(0x10);
  112. }
  113. static self max() {
  114. return self(SIGNED ? 0x7f : 0xff);
  115. }
  116. };
  117. }
  118. using f64 = double;
  119. using f32 = float;
  120. static_assert(sizeof(f32) == 4, "expect sizeof(f32) == 4");
  121. static_assert(sizeof(f64) == 8, "expect sizeof(f64) == 8");
  122. using f16 = NPackedFloat::float16<1>;
  123. using uf16 = NPackedFloat::float16<0>;
  124. using f8 = NPackedFloat::float8<1>;
  125. using uf8 = NPackedFloat::float8<0>;
  126. using f8d = NPackedFloat::float8<1, 1>;
  127. using uf8d = NPackedFloat::float8<0, 1>;
  128. // [0,1) value in 1/255s.
  129. using frac8 = ui8;
  130. using frac16 = ui16;
  131. template <class T>
  132. inline constexpr T Float2Frac(float fac) {
  133. return T(fac * float(Max<T>()));
  134. }
  135. template <class T>
  136. inline constexpr T Float2FracR(float fac) {
  137. float v = fac * float(Max<T>());
  138. return T(v + 0.5f);
  139. }
  140. template <class T>
  141. inline constexpr float Frac2Float(T pf) {
  142. constexpr float multiplier = float(1.0 / Max<T>());
  143. return pf * multiplier;
  144. }
  145. class TUi82FloatMapping {
  146. private:
  147. float Mapping[Max<ui8>() + 1] = {};
  148. public:
  149. constexpr TUi82FloatMapping() noexcept {
  150. for (ui32 i = 0; i < Y_ARRAY_SIZE(Mapping); ++i) {
  151. Mapping[i] = static_cast<float>(i) / Max<ui8>();
  152. }
  153. }
  154. inline float operator [] (ui8 index) const {
  155. return Mapping[index];
  156. }
  157. };
  158. constexpr TUi82FloatMapping Ui82FloatMapping{};
  159. template <>
  160. inline float Frac2Float(ui8 pf) {
  161. return Ui82FloatMapping[pf];
  162. }
  163. // Probably you don't want to use it, since sizeof(float) == sizeof(ui32)
  164. template <>
  165. inline float Frac2Float(ui32 pf) = delete;
  166. template <class T>
  167. inline float FracOrFloatToFloat(T t) {
  168. return Frac2Float(t);
  169. }
  170. template <>
  171. inline float FracOrFloatToFloat<float>(float t) {
  172. return t;
  173. }