100 constexpr int InputComponentsPerMCU = 4;
101 constexpr int PixelsPerMCU = 2;
102 constexpr int YsPerMCU = PixelsPerMCU;
103 constexpr int ComponentsPerPixel = 3;
104 constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU;
107 int numMCUs =
input.width() / InputComponentsPerMCU;
110 using MCUTy = std::array<YCbCr, PixelsPerMCU>;
112 auto LoadMCU = [input_ =
input, row](
int MCUIdx) {
114 for (
int YIdx = 0; YIdx < PixelsPerMCU; ++YIdx)
116 (InputComponentsPerMCU * MCUIdx) + YIdx, 1));
119 input_[row].getCrop((InputComponentsPerMCU * MCUIdx) + YsPerMCU, 2));
122 auto StoreMCU = [
this, out, row](
const MCUTy& MCU,
int MCUIdx) {
123 for (
int Pixel = 0; Pixel < PixelsPerMCU; ++Pixel) {
125 out[row].getCrop((OutputComponentsPerMCU * MCUIdx) +
126 (ComponentsPerPixel * Pixel),
145 for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) {
150 std::array<MCUTy, 2> MCUs;
151 for (
int SubMCUIdx = 0;
static_cast<unsigned>(SubMCUIdx) < MCUs.size();
153 MCUs[SubMCUIdx] = LoadMCU(MCUIdx + SubMCUIdx);
156 MCUs[0][0].process(
hue);
158 MCUs[1][0].process(
hue);
160 MCUs[0][1].interpolateCbCr(MCUs[0][0], MCUs[1][0]);
163 StoreMCU(MCUs[0], MCUIdx);
175 MCUTy MCU = LoadMCU(MCUIdx);
180 StoreMCU(MCU, MCUIdx);
197 constexpr int X_S_F = 2;
198 constexpr int Y_S_F = 2;
199 constexpr int PixelsPerMCU = X_S_F * Y_S_F;
200 constexpr int InputComponentsPerMCU = 2 + PixelsPerMCU;
202 constexpr int YsPerMCU = PixelsPerMCU;
203 constexpr int ComponentsPerPixel = 3;
204 constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU;
207 int numMCUs =
input.width() / InputComponentsPerMCU;
210 using MCUTy = std::array<std::array<YCbCr, X_S_F>, Y_S_F>;
212 auto LoadMCU = [input_ =
input](
int Row,
int MCUIdx)
215 for (
int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
216 for (
int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
218 input_[Row].getCrop((InputComponentsPerMCU * MCUIdx) +
219 (X_S_F * MCURow) + MCUCol,
225 input_[Row].getCrop((InputComponentsPerMCU * MCUIdx) + YsPerMCU, 2));
228 auto StoreMCU = [
this, out ](
const MCUTy& MCU,
int MCUIdx,
int Row)
230 for (
int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
231 for (
int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
233 out[(2 * Row) + MCURow].getCrop(
234 ((OutputComponentsPerMCU * MCUIdx) / Y_S_F) +
235 (ComponentsPerPixel * MCUCol),
280 for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) {
286 std::array<std::array<MCUTy, 2>, 2> MCUs;
287 for (
int Row = 0; Row < 2; ++Row)
288 for (
int Col = 0; Col < 2; ++Col)
289 MCUs[Row][Col] = LoadMCU(row + Row, MCUIdx + Col);
292 for (
int Row = 0; Row < 2; ++Row)
293 for (
int Col = 0; Col < 2; ++Col)
294 MCUs[Row][Col][0][0].process(
hue);
297 MCUs[0][0][0][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0]);
300 MCUs[0][0][1][0].interpolateCbCr(MCUs[0][0][0][0], MCUs[1][0][0][0]);
303 MCUs[0][0][1][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0],
304 MCUs[1][0][0][0], MCUs[1][1][0][0]);
316 StoreMCU(MCUs[0][0], MCUIdx, row);
334 std::array<MCUTy, 2> MCUs;
335 for (
int Row = 0; Row < 2; ++Row)
336 MCUs[Row] = LoadMCU(row + Row, MCUIdx);
338 for (
int Row = 0; Row < 2; ++Row)
339 MCUs[Row][0][0].process(
hue);
341 MCUs[0][1][0].interpolateCbCr(MCUs[0][0][0], MCUs[1][0][0]);
343 for (
int Row = 0; Row < 2; ++Row)
346 StoreMCU(MCUs[0], MCUIdx, row);
352 constexpr int X_S_F = 2;
353 constexpr int Y_S_F = 2;
354 constexpr int PixelsPerMCU = X_S_F * Y_S_F;
355 constexpr int InputComponentsPerMCU = 2 + PixelsPerMCU;
357 constexpr int YsPerMCU = PixelsPerMCU;
358 constexpr int ComponentsPerPixel = 3;
359 constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU;
362 int numMCUs =
input.width() / InputComponentsPerMCU;
365 using MCUTy = std::array<std::array<YCbCr, X_S_F>, Y_S_F>;
367 auto LoadMCU = [input_ =
input](
int Row,
int MCUIdx)
370 for (
int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
371 for (
int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
373 input_[Row].getCrop((InputComponentsPerMCU * MCUIdx) +
374 (X_S_F * MCURow) + MCUCol,
380 input_[Row].getCrop((InputComponentsPerMCU * MCUIdx) + YsPerMCU, 2));
383 auto StoreMCU = [
this, out ](
const MCUTy& MCU,
int MCUIdx,
int Row)
385 for (
int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
386 for (
int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
388 out[(2 * Row) + MCURow].getCrop(
389 ((OutputComponentsPerMCU * MCUIdx) / Y_S_F) +
390 (ComponentsPerPixel * MCUCol),
398#pragma omp parallel for default(none) schedule(static) \
399 num_threads(rawspeed_get_number_of_processor_cores()) firstprivate(out) \
402 for (row = 0; row <
input.height() - 1; ++row)
418 for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) {
424 std::array<std::array<MCUTy, 2>, 1> MCUs;
425 for (
int Row = 0; Row < 1; ++Row)
426 for (
int Col = 0; Col < 2; ++Col)
427 MCUs[Row][Col] = LoadMCU(row + Row, MCUIdx + Col);
430 for (
int Row = 0; Row < 1; ++Row)
431 for (
int Col = 0; Col < 2; ++Col)
432 MCUs[Row][Col][0][0].process(
hue);
435 MCUs[0][0][0][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0]);
439 for (
int Col = 0; Col < 2; ++Col)
443 StoreMCU(MCUs[0][0], MCUIdx, row);
458 MCUTy MCU = LoadMCU(row, MCUIdx);
460 MCU[0][0].process(
hue);
463 for (
int Row = 0; Row < 2; ++Row)
464 for (
int Col = 0; Col < 2; ++Col)
467 StoreMCU(MCU, MCUIdx, row);