49 static_assert(BPS::value > 0,
"bad bps");
53 int inputPitchBits = BPS::value *
dim.x;
56 assert(inputPitchBits % 8 == 0);
57 int inputPitchBytes = inputPitchBits / 8;
58 assert(inputPitchBits % BPS::value == 0);
59 dim.x = inputPitchBits / BPS::value;
63 const std::vector<uint8_t> buf(packedLength);
68 std::is_same_v<T, float>
73 for (
auto _ : state) {
76 inputPitchBytes, BPS::value, BO);
80 state.SetComplexityN(
dim.area());
81 state.SetItemsProcessed(state.complexity_length_n() * state.iterations());
82 state.SetBytesProcessed(BPS::value * state.items_processed() / 8);
85inline void CustomArgs(benchmark::internal::Benchmark* b) {
86 b->Unit(benchmark::kMicrosecond);
89 static constexpr int L2dByteSize = 512U * (1U << 10U);
90 b->Arg((L2dByteSize / (32 / 8)) / 2);
94 b->RangeMultiplier(2);
95 b->Range(1, 1 * 1024 * 1024)->Complexity(benchmark::oN);