RawSpeed
fast raw decoding library
Loading...
Searching...
No Matches
FloatingPoint.h
Go to the documentation of this file.
1/*
2 RawSpeed - RAW file decoder.
3
4 Copyright (C) 2017 Vasily Khoruzhick
5 Copyright (C) 2020 Roman Lebedev
6
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
11
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with this library; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20*/
21
22#pragma once
23
24#include <cstdint>
25
26namespace rawspeed {
27
28namespace ieee_754_2008 {
29
30// Refer to "3.6 Interchange format parameters",
31// "Table 3.5—Binary interchange format parameters"
32
33// All formats are:
34// MSB [Sign bit] [Exponent bits] [Fraction bits] LSB
35
36template <int StorageWidth_, int FractionWidth_, int ExponentWidth_>
37struct BinaryN {
38 static constexpr uint32_t StorageWidth = StorageWidth_;
39
40 // FIXME: if we had compile-time log2/round, we'd only need StorageWidth.
41
42 static constexpr uint32_t FractionWidth = FractionWidth_;
43 static constexpr uint32_t ExponentWidth = ExponentWidth_;
44 // SignWidth is always 1.
45 static_assert(FractionWidth + ExponentWidth + 1 == StorageWidth);
46
47 static constexpr uint32_t Precision = FractionWidth + 1;
48
49 static constexpr uint32_t ExponentMax = (1 << (ExponentWidth - 1)) - 1;
50
51 static constexpr int32_t Bias = ExponentMax;
52
53 // FractionPos is always 0.
55 static constexpr uint32_t SignBitPos = StorageWidth - 1;
56};
57
58// IEEE-754-2008: binary16:
59// bits 9-0 - fraction (10 bit)
60// bits 14-10 - exponent (5 bit)
61// bit 15 - sign
62struct Binary16 final
63 : public BinaryN</*StorageWidth=*/16, /*FractionWidth=*/10,
64 /*ExponentWidth=*/5> {
65 static_assert(Precision == 11);
66 static_assert(ExponentMax == 15);
67 static_assert(ExponentPos == 10);
68 static_assert(SignBitPos == 15);
69};
70
71// IEEE-754-2008: binary24:
72// bits 15-0 - fraction (16 bit)
73// bits 22-16 - exponent (7 bit)
74// bit 23 - sign
75struct Binary24 final
76 : public BinaryN</*StorageWidth=*/24, /*FractionWidth=*/16,
77 /*ExponentWidth=*/7> {
78 static_assert(Precision == 17);
79 static_assert(ExponentMax == 63);
80 static_assert(ExponentPos == 16);
81 static_assert(SignBitPos == 23);
82};
83
84// IEEE-754-2008: binary32:
85// bits 22-0 - fraction (23 bit)
86// bits 30-23 - exponent (8 bit)
87// bit 31 - sign
88struct Binary32 final
89 : public BinaryN</*StorageWidth=*/32, /*FractionWidth=*/23,
90 /*ExponentWidth=*/8> {
91 static_assert(Precision == 24);
92 static_assert(ExponentMax == 127);
93 static_assert(ExponentPos == 23);
94 static_assert(SignBitPos == 31);
95};
96
97// exp = 0, fract = +-0: zero
98// exp = 0; fract != 0: subnormal numbers
99// equation: -1 ^ sign * 2 ^ (1 - Bias) * 0.fraction
100// exp = 1..(2^ExponentWidth - 2): normalized value
101// equation: -1 ^ sign * 2 ^ (exponent - Bias) * 1.fraction
102// exp = 2^ExponentWidth - 1, fract = +-0: +-infinity
103// exp = 2^ExponentWidth - 1, fract != 0: NaN
104
105} // namespace ieee_754_2008
106
107template <typename NarrowType, typename WideType>
109 uint32_t sign = (narrow >> NarrowType::SignBitPos) & 1;
110 uint32_t narrow_exponent = (narrow >> NarrowType::ExponentPos) &
111 ((1 << NarrowType::ExponentWidth) - 1);
112 uint32_t narrow_fraction = narrow & ((1 << NarrowType::FractionWidth) - 1);
113
114 // Normalized or zero
115 uint32_t wide_exponent =
116 static_cast<int32_t>(narrow_exponent) - NarrowType::Bias + WideType::Bias;
117 uint32_t wide_fraction =
118 narrow_fraction << (WideType::FractionWidth - NarrowType::FractionWidth);
119
120 if (narrow_exponent == ((1 << NarrowType::ExponentWidth) - 1)) {
121 // Infinity or NaN
122 wide_exponent = ((1 << WideType::ExponentWidth) - 1);
123 // Narrow fraction is kept/widened!
124 } else if (narrow_exponent == 0) {
125 if (narrow_fraction == 0) {
126 // +-Zero
127 wide_exponent = 0;
128 wide_fraction = 0;
129 } else {
130 // Subnormal numbers
131 // We can represent it as a normalized value in wider type,
132 // we have to shift fraction until we get 1.new_fraction
133 // and decrement exponent for each shift.
134 // FIXME; what is the implicit precondition here?
135 wide_exponent = 1 - NarrowType::Bias + WideType::Bias;
136 while (!(wide_fraction & (1 << WideType::FractionWidth))) {
137 wide_exponent -= 1;
138 wide_fraction <<= 1;
139 }
140 wide_fraction &= ((1 << WideType::FractionWidth) - 1);
141 }
142 }
143 return (sign << WideType::SignBitPos) |
144 (wide_exponent << WideType::ExponentPos) | wide_fraction;
145}
146
147// Expand IEEE-754-2008 binary16 into float32
152
153// Expand IEEE-754-2008 binary24 into float32
158
159} // namespace rawspeed
uint32_t fp24ToFloat(uint32_t fp24)
uint32_t fp16ToFloat(uint16_t fp16)
uint32_t extendBinaryFloatingPoint(uint32_t narrow)
static constexpr uint32_t ExponentWidth
static constexpr uint32_t StorageWidth
static constexpr int32_t Bias
static constexpr uint32_t ExponentPos
static constexpr uint32_t Precision
static constexpr uint32_t SignBitPos
static constexpr uint32_t FractionWidth
static constexpr uint32_t ExponentMax