RawSpeed
fast raw decoding library
Loading...
Searching...
No Matches
BitVacuumerBenchmark.cpp
Go to the documentation of this file.
1/*
2 RawSpeed - RAW file decoder.
3
4 Copyright (C) 2024 Roman Lebedev
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19*/
20
21#include "adt/Bit.h"
22#include "adt/Array1DRef.h"
23#include "adt/Casts.h"
26#include "adt/Invariant.h"
28#include "bench/Common.h"
33#include "common/Common.h"
34#include <cassert>
35#include <climits>
36#include <cstddef>
37#include <cstdint>
38#include <iterator>
39#include <memory>
40#include <random>
41#include <type_traits>
42#include <vector>
43#include <benchmark/benchmark.h>
44
45#ifndef NDEBUG
46#include <limits>
47#endif
48
49namespace rawspeed {
50
51namespace {
52
53struct BitstreamFlavorLSB;
54struct BitstreamFlavorMSB;
55struct BitstreamFlavorMSB16;
56struct BitstreamFlavorMSB32;
57
58template <typename T> struct BitStreamRoundtripTypes final {};
59
60template <> struct BitStreamRoundtripTypes<BitstreamFlavorLSB> final {
61 template <typename OutputIterator>
63};
64
65template <> struct BitStreamRoundtripTypes<BitstreamFlavorMSB> final {
66 template <typename OutputIterator>
68};
69
70template <> struct BitStreamRoundtripTypes<BitstreamFlavorMSB16> final {
71 template <typename OutputIterator>
73};
74
75template <> struct BitStreamRoundtripTypes<BitstreamFlavorMSB32> final {
76 template <typename OutputIterator>
78};
79
81 std::vector<int8_t,
84
85 int64_t numBitsToProduce = 0;
86
87 [[nodiscard]] Array1DRef<const int8_t> getInput() const {
88 return {dataStorage.data(), implicit_cast<int>(dataStorage.size())};
89 }
90
91#pragma GCC diagnostic push
92#pragma GCC diagnostic ignored "-Wpragmas"
93#pragma GCC diagnostic ignored "-Wunknown-warning-option"
94#pragma GCC diagnostic ignored "-Wunknown-pragmas"
95#pragma GCC diagnostic ignored "-Wframe-larger-than="
96#pragma GCC diagnostic ignored "-Wstack-usage="
98 const int64_t maxBytes) {
99 invariant(maxBytes > 0);
100
101 std::uniform_int_distribution<> dist(0, 32);
102
103 std::random_device rd;
104 std::mt19937_64 gen(rd());
105
106 for (int64_t numBits = 0; implicit_cast<int64_t>(roundUpDivisionSafe(
107 numBits, CHAR_BIT)) < maxBytes;) {
108 int len = dist(gen);
109 numBitsToProduce += len;
110 dataStorage.emplace_back(len);
111 numBits += bitwidth<int8_t>();
112 numBits += len;
113 }
114 }
115#pragma GCC diagnostic pop
116};
117
118template <bool b, typename T> struct C {
119 using coalescing = std::bool_constant<b>;
120 using value_type = T;
121};
123template <typename T> using CoalesceTo = C<true, T>;
124
125template <bool ShouldCoalesce, typename UnderlyingOutputIterator>
126 requires(!ShouldCoalesce)
127auto getMaybeCoalescingOutputIterator(UnderlyingOutputIterator e) {
128 return e;
129}
130
131template <bool ShouldCoalesce, typename UnderlyingOutputIterator>
132 requires(ShouldCoalesce)
133auto getMaybeCoalescingOutputIterator(UnderlyingOutputIterator e) {
134 return CoalescingOutputIterator(e);
135}
136
137template <typename T, typename C> void BM(benchmark::State& state) {
138 int64_t numBytes = state.range(0);
139 assert(numBytes > 0);
140 assert(numBytes <= std::numeric_limits<int>::max());
141
142 const BitVectorLengthsGenerator gen(numBytes);
143 const Array1DRef<const int8_t> input = gen.getInput();
144 benchmark::DoNotOptimize(input.begin());
145
146 using OutputChunkType = typename C::value_type;
147 std::vector<OutputChunkType,
148 DefaultInitAllocatorAdaptor<OutputChunkType,
149 std::allocator<OutputChunkType>>>
150 output;
152 gen.numBitsToProduce, CHAR_BIT * sizeof(OutputChunkType))));
153
154 for (auto _ : state) {
155 output.clear();
156
157 auto bsInserter = PartitioningOutputIterator(
159 std::back_inserter(output)));
160 using BitVacuumer = typename BitStreamRoundtripTypes<T>::template vacuumer<
161 decltype(bsInserter)>;
162 auto bv = BitVacuumer(bsInserter);
163
164 int bits = 0;
165 for (const auto& len : input) {
166 benchmark::DoNotOptimize(bits);
167 bv.put(bits, len);
168 }
169 }
170
171 state.SetComplexityN(sizeof(typename decltype(output)::value_type) *
172 output.size());
173 state.counters.insert({
174 {"Throughput",
175 benchmark::Counter(sizeof(uint8_t) * state.complexity_length_n(),
176 benchmark::Counter::Flags::kIsIterationInvariantRate,
177 benchmark::Counter::kIs1024)},
178 {"Latency",
179 benchmark::Counter(sizeof(uint8_t) * state.complexity_length_n(),
180 benchmark::Counter::Flags::kIsIterationInvariantRate |
181 benchmark::Counter::Flags::kInvert,
182 benchmark::Counter::kIs1000)},
183 });
184}
185
186void CustomArguments(benchmark::internal::Benchmark* b) {
187 b->Unit(benchmark::kMicrosecond);
188 b->RangeMultiplier(2);
189
190 static constexpr int L1dByteSize = 32U * (1U << 10U);
191 static constexpr int L2dByteSize = 512U * (1U << 10U);
192 static constexpr int MaxBytesOptimal = L2dByteSize * (1U << 3);
193
194 if (benchmarkDryRun()) {
195 b->Arg(L1dByteSize);
196 return;
197 }
198
199 if constexpr ((true)) {
200 b->Arg(MaxBytesOptimal);
201 } else {
202 b->Range(8, MaxBytesOptimal * (1U << 2));
203 b->Complexity(benchmark::oN);
204 }
205}
206
207// NOLINTBEGIN(cppcoreguidelines-macro-usage)
208
209#define GEN(A, B) BENCHMARK_TEMPLATE2(BM, A, B)->Apply(CustomArguments)
210
211#define GEN_T(A) \
212 GEN(A, NoCoalescing); \
213 GEN(A, CoalesceTo<uint16_t>); \
214 GEN(A, CoalesceTo<uint32_t>); \
215 GEN(A, CoalesceTo<uint64_t>)
216
217GEN_T(BitstreamFlavorLSB);
218GEN_T(BitstreamFlavorMSB);
219GEN_T(BitstreamFlavorMSB16);
220GEN_T(BitstreamFlavorMSB32);
221
222// NOLINTEND(cppcoreguidelines-macro-usage)
223
224} // namespace
225
226} // namespace rawspeed
227
BENCHMARK_MAIN()
constexpr uint64_t RAWSPEED_READNONE roundUpDivisionSafe(uint64_t value, uint64_t div)
Definition Common.h:145
#define invariant(expr)
Definition Invariant.h:27
assert(dim.area() >=area)
bool RAWSPEED_READNONE benchmarkDryRun()
constexpr uint64_t RAWSPEED_READNONE roundUpDivisionSafe(uint64_t value, uint64_t div)
Definition Common.h:145
constexpr RAWSPEED_READNONE Ttgt implicit_cast(Tsrc value)
Definition Casts.h:32
throw T(buf.data())
CoalescingOutputIterator(T) -> CoalescingOutputIterator< T >
constexpr unsigned RAWSPEED_READNONE bitwidth(T unused={})
Definition Bit.h:43
PartitioningOutputIterator(T) -> PartitioningOutputIterator< T >
constexpr RAWSPEED_READNONE Ttgt implicit_cast(Tsrc value)
Definition Casts.h:32
__attribute__((noinline)) explicit BitVectorLengthsGenerator(const int64_t maxBytes)
std::vector< int8_t, DefaultInitAllocatorAdaptor< int8_t, std::allocator< int8_t > > > dataStorage