RawSpeed
fast raw decoding library
Loading...
Searching...
No Matches
BitVacuumerJPEGBenchmark.cpp
Go to the documentation of this file.
1/*
2 RawSpeed - RAW file decoder.
3
4 Copyright (C) 2024 Roman Lebedev
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19*/
20
22#include "adt/Array1DRef.h"
23#include "adt/Casts.h"
26#include "adt/Optional.h"
28#include "bench/Common.h"
31#include "common/Common.h"
32#include <cassert>
33#include <cstddef>
34#include <cstdint>
35#include <iterator>
36#include <memory>
37#include <type_traits>
38#include <vector>
39#include <benchmark/benchmark.h>
40
41#ifndef NDEBUG
42#include <limits>
43#endif
44
45namespace rawspeed {
46
47namespace {
48
49struct BitstreamFlavorMSB;
50struct BitstreamFlavorJPEG;
51
52template <typename T> struct BitStreamRoundtripTypes final {};
53
54template <> struct BitStreamRoundtripTypes<BitstreamFlavorMSB> final {
55 template <typename OutputIterator>
57};
58
59template <> struct BitStreamRoundtripTypes<BitstreamFlavorJPEG> final {
60 template <typename OutputIterator>
62};
63
64template <bool b, typename T> struct C {
65 using coalescing = std::bool_constant<b>;
66 using value_type = T;
67};
69template <typename T> using CoalesceTo = C<true, T>;
70
71template <bool ShouldCoalesce, typename UnderlyingOutputIterator>
72 requires(!ShouldCoalesce)
73auto getMaybeCoalescingOutputIterator(UnderlyingOutputIterator e) {
74 return e;
75}
76
77template <bool ShouldCoalesce, typename UnderlyingOutputIterator>
78 requires(ShouldCoalesce)
79auto getMaybeCoalescingOutputIterator(UnderlyingOutputIterator e) {
81}
82
83template <typename T, typename C>
84void BM(benchmark::State& state, bool Stuffed) {
85 int64_t numBytes = state.range(0);
86 assert(numBytes > 0);
87 assert(numBytes <= std::numeric_limits<int>::max());
88
92 if (Stuffed) {
93 genStuffed.emplace(numBytes, /*AppendStuffingByte=*/false);
94 numBytes = genStuffed->numBytesGenerated;
95 input = genStuffed->getInput();
96 } else {
97 genUnstuffed.emplace(numBytes);
98 numBytes = genUnstuffed->numBytesGenerated;
99 input = genUnstuffed->getInput();
100 }
101 benchmark::DoNotOptimize(input->begin());
102
103 using OutputChunkType = typename C::value_type;
104 std::vector<OutputChunkType,
105 DefaultInitAllocatorAdaptor<OutputChunkType,
106 std::allocator<OutputChunkType>>>
107 output;
108 output.reserve(implicit_cast<size_t>(
109 roundUpDivisionSafe(input->size(), sizeof(OutputChunkType))));
110
111 for (auto _ : state) {
112 output.clear();
113
114 auto bsInserter = PartitioningOutputIterator(
116 std::back_inserter(output)));
117 using BitVacuumer = typename BitStreamRoundtripTypes<T>::template vacuumer<
118 decltype(bsInserter)>;
119 auto bv = BitVacuumer(bsInserter);
120
121 int count = 8;
122 for (auto bits : *input) {
123 benchmark::DoNotOptimize(bits);
124 benchmark::DoNotOptimize(count);
125 bv.put(bits, count);
126 }
127 }
128
129 state.SetComplexityN(numBytes);
130 state.counters.insert({
131 {"Throughput",
132 benchmark::Counter(sizeof(uint8_t) * state.complexity_length_n(),
133 benchmark::Counter::Flags::kIsIterationInvariantRate,
134 benchmark::Counter::kIs1024)},
135 {"Latency",
136 benchmark::Counter(sizeof(uint8_t) * state.complexity_length_n(),
137 benchmark::Counter::Flags::kIsIterationInvariantRate |
138 benchmark::Counter::Flags::kInvert,
139 benchmark::Counter::kIs1000)},
140 });
141}
142
143void CustomArguments(benchmark::internal::Benchmark* b) {
144 b->Unit(benchmark::kMicrosecond);
145 b->RangeMultiplier(2);
146
147 static constexpr int L1dByteSize = 32U * (1U << 10U);
148 static constexpr int L2dByteSize = 512U * (1U << 10U);
149 static constexpr int MaxBytesOptimal = L2dByteSize * (1U << 2);
150
151 if (benchmarkDryRun()) {
152 b->Arg(L1dByteSize);
153 return;
154 }
155
156 if constexpr ((true)) {
157 b->Arg(MaxBytesOptimal);
158 } else {
159 b->Range(8, MaxBytesOptimal * (1U << 2));
160 b->Complexity(benchmark::oN);
161 }
162}
163
164// NOLINTBEGIN(cppcoreguidelines-macro-usage)
165
166#ifndef BENCHMARK_TEMPLATE2_CAPTURE
167#define BENCHMARK_TEMPLATE2_CAPTURE(func, a, b, test_case_name, ...) \
168 BENCHMARK_PRIVATE_DECLARE(func) = \
169 (::benchmark::internal::RegisterBenchmarkInternal( \
170 new ::benchmark::internal::FunctionBenchmark( \
171 #func "<" #a "," #b ">" \
172 "/" #test_case_name, \
173 [](::benchmark::State& st) { func<a, b>(st, __VA_ARGS__); })))
174#endif // BENCHMARK_TEMPLATE2_CAPTURE
175
176// NOLINTNEXTLINE(bugprone-macro-parentheses)
177#define GEN(A, B, C, D) \
178 BENCHMARK_TEMPLATE2_CAPTURE(BM, A, B, C, D)->Apply(CustomArguments)
179
180#define GEN_T(A, C, D) \
181 GEN(A, NoCoalescing, C, D); \
182 GEN(A, CoalesceTo<uint16_t>, C, D); \
183 GEN(A, CoalesceTo<uint32_t>, C, D); \
184 GEN(A, CoalesceTo<uint64_t>, C, D)
185
186GEN_T(BitstreamFlavorJPEG, Stuffed, true);
187GEN_T(BitstreamFlavorJPEG, Unstuffed, false);
188GEN_T(BitstreamFlavorMSB, Unstuffed, false);
189
190// NOLINTEND(cppcoreguidelines-macro-usage)
191
192} // namespace
193
194} // namespace rawspeed
195
BENCHMARK_MAIN()
assert(dim.area() >=area)
bool RAWSPEED_READNONE benchmarkDryRun()
T & emplace(Args &&... args)
Definition Optional.h:51
constexpr uint64_t RAWSPEED_READNONE roundUpDivisionSafe(uint64_t value, uint64_t div)
Definition Common.h:145
constexpr RAWSPEED_READNONE Ttgt implicit_cast(Tsrc value)
Definition Casts.h:32
throw T(buf.data())
CoalescingOutputIterator(T) -> CoalescingOutputIterator< T >
PartitioningOutputIterator(T) -> PartitioningOutputIterator< T >