137template <
typename T,
typename C>
void BM(benchmark::State& state) {
138 int64_t numBytes = state.range(0);
140 assert(numBytes <= std::numeric_limits<int>::max());
144 benchmark::DoNotOptimize(input.
begin());
147 std::vector<OutputChunkType,
149 std::allocator<OutputChunkType>>>
154 for (
auto _ : state) {
159 std::back_inserter(output)));
161 decltype(bsInserter)>;
165 for (
const auto& len : input) {
166 benchmark::DoNotOptimize(bits);
171 state.SetComplexityN(
sizeof(
typename decltype(output)::value_type) *
173 state.counters.insert({
175 benchmark::Counter(
sizeof(uint8_t) * state.complexity_length_n(),
176 benchmark::Counter::Flags::kIsIterationInvariantRate,
177 benchmark::Counter::kIs1024)},
179 benchmark::Counter(
sizeof(uint8_t) * state.complexity_length_n(),
180 benchmark::Counter::Flags::kIsIterationInvariantRate |
181 benchmark::Counter::Flags::kInvert,
182 benchmark::Counter::kIs1000)},
187 b->Unit(benchmark::kMicrosecond);
188 b->RangeMultiplier(2);
190 static constexpr int L1dByteSize = 32U * (1U << 10U);
191 static constexpr int L2dByteSize = 512U * (1U << 10U);
192 static constexpr int MaxBytesOptimal = L2dByteSize * (1U << 3);
199 if constexpr ((
true)) {
200 b->Arg(MaxBytesOptimal);
202 b->Range(8, MaxBytesOptimal * (1U << 2));
203 b->Complexity(benchmark::oN);