RawSpeed
fast raw decoding library
Loading...
Searching...
No Matches
VariableLengthLoadBenchmark.cpp
Go to the documentation of this file.
1/*
2 RawSpeed - RAW file decoder.
3
4 Copyright (C) 2024 Roman Lebedev
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19*/
20
22#include "adt/Array1DRef.h"
23#include "adt/Casts.h"
24#include "adt/Invariant.h"
25#include "bench/Common.h"
26#include "common/Common.h"
27#include <array>
28#include <cstddef>
29#include <cstdint>
30#include <vector>
31#include <benchmark/benchmark.h>
32
33namespace rawspeed {
34
35namespace {
36
37[[maybe_unused]] inline void fixedLengthLoad(Array1DRef<std::byte> out,
39 int inPos) {
40 invariant(out.size() != 0);
41 invariant(in.size() != 0);
42 invariant(out.size() <= in.size());
43 invariant(inPos >= 0);
44
45 // Here we "somehow" know that the load is always in-bounds.
46 invariant(inPos < in.size());
47 invariant(inPos + out.size() <= in.size());
48
50}
51
52template <decltype(fixedLengthLoad) Callable>
53[[maybe_unused]] inline void
56 invariant(out.size() != 0);
57 invariant(in.size() != 0);
58 invariant(out.size() <= in.size());
59 invariant(inPos >= 0);
60
61 if (inPos + out.size() <= in.size()) {
62 fixedLengthLoad(out, in, inPos);
63 return;
64 }
65
66 Callable(out, in, inPos);
67}
68
69} // namespace
70
71} // namespace rawspeed
72
73namespace {
74
75using rawspeed::fixedLengthLoad;
76using rawspeed::fixedLengthLoadOr;
80
81template <decltype(variableLengthLoadNaiveViaMemcpy) Impl, typename T>
82void BM_Impl(benchmark::State& state) {
83 constexpr int bytesPerItem = sizeof(T);
84
85 int64_t numBytes = rawspeed::roundUp(state.range(0), bytesPerItem);
86 benchmark::DoNotOptimize(numBytes);
87
88 const std::vector<std::byte> inStorage(
90
92 inStorage.data(), rawspeed::implicit_cast<int>(numBytes));
93
94 std::array<std::byte, bytesPerItem> outStorage;
96 outStorage.data(), rawspeed::implicit_cast<int>(bytesPerItem));
97
98 for (auto _ : state) {
99 for (int inPos = 0; inPos < numBytes; inPos += bytesPerItem) {
100 Impl(out, in, inPos);
101 benchmark::DoNotOptimize(out.begin());
102 }
103 }
104
105 state.SetComplexityN(numBytes);
106 state.counters.insert({
107 {"Throughput",
108 benchmark::Counter(sizeof(std::byte) * state.complexity_length_n(),
109 benchmark::Counter::Flags::kIsIterationInvariantRate,
110 benchmark::Counter::kIs1024)},
111 {"Latency",
112 benchmark::Counter(sizeof(std::byte) * state.complexity_length_n(),
113 benchmark::Counter::Flags::kIsIterationInvariantRate |
114 benchmark::Counter::Flags::kInvert,
115 benchmark::Counter::kIs1000)},
116 });
117}
118
119void CustomArguments(benchmark::internal::Benchmark* b) {
120 b->Unit(benchmark::kMicrosecond);
121
122 static constexpr int L1dByteSize = 32U * (1U << 10U);
123 static constexpr int L2dByteSize = 512U * (1U << 10U);
124 static constexpr int MaxBytesOptimal = L2dByteSize;
125
126 if (benchmarkDryRun()) {
127 b->Arg(L1dByteSize);
128 return;
129 }
130
131 b->RangeMultiplier(2);
132 if constexpr ((true))
133 b->Arg(MaxBytesOptimal);
134 else
135 b->Range(1, 2048UL << 20)->Complexity(benchmark::oN);
136}
137
138// NOLINTBEGIN(cppcoreguidelines-macro-usage)
139
140#define GEN(I, T) BENCHMARK(BM_Impl<I, T>)->Apply(CustomArguments)
141
142#define GEN_CALLABLE(I) \
143 GEN(I, uint8_t); \
144 GEN(I, uint16_t); \
145 GEN(I, uint32_t); \
146 GEN(I, uint64_t)
147
148#define GEN_TIME() \
149 GEN_CALLABLE(GEN_WRAPPER(fixedLengthLoad)); \
150 GEN_CALLABLE(GEN_WRAPPER(variableLengthLoad)); \
151 GEN_CALLABLE(GEN_WRAPPER(variableLengthLoadNaiveViaConditionalLoad)); \
152 GEN_CALLABLE(GEN_WRAPPER(variableLengthLoadNaiveViaMemcpy))
153
154#undef GEN_WRAPPER
155#define GEN_WRAPPER(I) I
156
158
159#undef GEN_WRAPPER
160#define GEN_WRAPPER(I) fixedLengthLoadOr<I>
161
162GEN_TIME();
163
164// NOLINTEND(cppcoreguidelines-macro-usage)
165
166} // namespace
167
#define invariant(expr)
Definition Invariant.h:27
BENCHMARK_MAIN()
#define GEN_TIME()
bool RAWSPEED_READNONE benchmarkDryRun()
int RAWSPEED_READONLY size() const
void fixedLengthLoad(Array1DRef< std::byte > out, Array1DRef< const std::byte > in, int inPos)
void fixedLengthLoadOr(rawspeed::Array1DRef< std::byte > out, rawspeed::Array1DRef< const std::byte > in, int inPos)
constexpr uint64_t RAWSPEED_READNONE roundUp(uint64_t value, uint64_t multiple)
Definition Common.h:134
constexpr RAWSPEED_READNONE Ttgt implicit_cast(Tsrc value)
Definition Casts.h:32
void variableLengthLoadNaiveViaConditionalLoad(Array1DRef< std::byte > out, Array1DRef< const std::byte > in, int inPos)
void variableLengthLoad(const Array1DRef< std::byte > out, Array1DRef< const std::byte > in, int inPos)
void variableLengthLoadNaiveViaMemcpy(Array1DRef< std::byte > out, Array1DRef< const std::byte > in, int inPos)