RawSpeed
fast raw decoding library
Loading...
Searching...
No Matches
RawImageDataU16.cpp
Go to the documentation of this file.
1/*
2 RawSpeed - RAW file decoder.
3
4 Copyright (C) 2009-2014 Klaus Post
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19*/
20
21#include "rawspeedconfig.h"
22#include "common/RawImage.h"
23#include "adt/Array1DRef.h"
24#include "adt/Array2DRef.h"
25#include "adt/Bit.h"
26#include "adt/Casts.h"
28#include "adt/Point.h"
29#include "common/Common.h"
30#include "common/TableLookUp.h"
32#include "metadata/BlackArea.h"
33#include <algorithm>
34#include <array>
35#include <cassert>
36#include <cstdint>
37#include <memory>
38#include <vector>
39
40#ifdef WITH_SSE2
41#include "common/CpuFeatures.h"
42#include <emmintrin.h>
43#endif
44
45using std::array;
46using std::max;
47using std::min;
48using std::vector;
49
50namespace rawspeed {
51
56
59
62
63 std::vector<uint16_t> histogramStorage;
64 auto histogram = Array2DRef<uint16_t>::create(histogramStorage, 65536, 4);
65
66 for (int row = 0; row != histogram.height(); ++row) {
67 for (int col = 0; col != histogram.width(); ++col) {
68 histogram(row, col) = 0;
69 }
70 }
71
72 int totalpixels = 0;
73
74 for (auto area : blackAreas) {
75 /* Make sure area sizes are multiple of two,
76 so we have the same amount of pixels for each CFA group */
77 area.size = area.size - (area.size & 1);
78
79 /* Process horizontal area */
80 if (!area.isVertical) {
81 if (static_cast<int>(area.offset) + static_cast<int>(area.size) >
83 ThrowRDE("Offset + size is larger than height of image");
84 for (uint32_t y = area.offset; y < area.offset + area.size; y++) {
85 for (int x = mOffset.x; x < dim.x + mOffset.x; x++) {
86 // FIXME: this only samples a single row, not an area.
87 const auto localhist = histogram[(2 * (y & 1)) + (x & 1)];
88 const auto hBin = img(y, mOffset.x);
89 localhist(hBin)++;
90 }
91 }
92 totalpixels += area.size * dim.x;
93 }
94
95 /* Process vertical area */
96 if (area.isVertical) {
97 if (static_cast<int>(area.offset) + static_cast<int>(area.size) >
99 ThrowRDE("Offset + size is larger than width of image");
100 for (int y = mOffset.y; y < dim.y + mOffset.y; y++) {
101 for (uint32_t x = area.offset; x < area.size + area.offset; x++) {
102 // FIXME: this only samples a single row, not an area.
103 const auto localhist = histogram[(2 * (y & 1)) + (x & 1)];
104 const auto hBin = img(y, area.offset);
105 localhist(hBin)++;
106 }
107 }
108 totalpixels += area.size * dim.y;
109 }
110 }
111
113 auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();
114
115 if (!totalpixels) {
116 for (int& i : blackLevelSeparate1D)
117 i = blackLevel;
118 return;
119 }
120
121 /* Calculate median value of black areas for each component */
122 /* Adjust the number of total pixels so it is the same as the median of each
123 * histogram */
124 totalpixels /= 4 * 2;
125
126 for (int i = 0; i < 4; i++) {
127 const auto localhist = histogram[i];
128 int acc_pixels = localhist(0);
129 int pixel_value = 0;
130 while (acc_pixels <= totalpixels && pixel_value < 65535) {
131 pixel_value++;
132 acc_pixels += localhist(pixel_value);
133 }
134 blackLevelSeparate1D(i) = pixel_value;
135 }
136
137 /* If this is not a CFA image, we do not use separate blacklevels, use average
138 */
139 if (!isCFA) {
140 int total = 0;
141 for (int i : blackLevelSeparate1D)
142 total += i;
143 for (int& i : blackLevelSeparate1D)
144 i = (total + 2) >> 2;
145 }
146}
147
149 const int skipBorder = 250;
150 int gw = (dim.x - skipBorder) * cpp;
151 if ((blackAreas.empty() && !blackLevelSeparate && blackLevel < 0) ||
152 !whitePoint) { // Estimate
153 int b = 65536;
154 int m = 0;
156 for (int row = skipBorder; row < (dim.y - skipBorder); row++) {
157 for (int col = skipBorder; col < gw; col++) {
158 uint16_t pixel = img(row, skipBorder + col);
159 b = min(static_cast<int>(pixel), b);
160 m = max(static_cast<int>(pixel), m);
161 }
162 }
163 if (blackLevel < 0)
164 blackLevel = b;
165 if (!whitePoint)
166 whitePoint = m;
168 "ISO:%d, Estimated black:%d, Estimated white: %d",
169 metadata.isoSpeed, blackLevel, *whitePoint);
170 }
171
172 /* Skip, if not needed */
173 if ((blackAreas.empty() && blackLevel == 0 && whitePoint == 65535 &&
175 dim.area() <= 0)
176 return;
177
178 /* If filter has not set separate blacklevel, compute or fetch it */
181
183}
184
185void RawImageDataU16::scaleValues(int start_y, int end_y) {
186#ifndef WITH_SSE2
187
188 return scaleValues_plain(start_y, end_y);
189
190#else
191
192 int depth_values = *whitePoint - (*blackLevelSeparate)(0, 0);
193 float app_scale = 65535.0F / implicit_cast<float>(depth_values);
194
195 // Check SSE2
196 if (Cpuid::SSE2() && app_scale < 63) {
197 scaleValues_SSE2(start_y, end_y);
198 } else {
199 scaleValues_plain(start_y, end_y);
200 }
201
202#endif
203}
204
205#ifdef WITH_SSE2
206void RawImageDataU16::scaleValues_SSE2(int start_y, int end_y) {
207 assert(blackLevelSeparate->width() == 2 && blackLevelSeparate->height() == 2);
208 auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();
209
210 int depth_values = *whitePoint - blackLevelSeparate1D(0);
211 float app_scale = 65535.0F / implicit_cast<float>(depth_values);
212
213 // Scale in 30.2 fp
214 auto full_scale_fp = static_cast<int>(app_scale * 4.0F);
215 // Half Scale in 18.14 fp
216 auto half_scale_fp = static_cast<int>(app_scale * 4095.0F);
217
218 __m128i sseround;
219 __m128i ssesub2;
220 __m128i ssesign;
221 __m128i rand_mul;
222 __m128i rand_mask;
223 __m128i sse_full_scale_fp;
224 __m128i sse_half_scale_fp;
225
226 std::array<uint32_t, 4 * 4> sub_mul;
227
228 // 10 bit fraction
229 uint32_t mul = static_cast<int>(
230 1024.0F * 65535.0F /
231 static_cast<float>(*whitePoint - blackLevelSeparate1D(mOffset.x & 1)));
232 mul |= (static_cast<int>(
233 1024.0F * 65535.0F /
234 static_cast<float>(*whitePoint -
235 blackLevelSeparate1D((mOffset.x + 1) & 1))))
236 << 16;
237 uint32_t b = blackLevelSeparate1D(mOffset.x & 1) |
238 (blackLevelSeparate1D((mOffset.x + 1) & 1) << 16);
239
240 for (int i = 0; i < 4; i++) {
241 sub_mul[i] = b; // Subtract even lines
242 sub_mul[4 + i] = mul; // Multiply even lines
243 }
244
245 mul = static_cast<int>(
246 1024.0F * 65535.0F /
247 static_cast<float>(*whitePoint -
248 blackLevelSeparate1D(2 + (mOffset.x & 1))));
249 mul |= (static_cast<int>(
250 1024.0F * 65535.0F /
251 static_cast<float>(*whitePoint - blackLevelSeparate1D(
252 2 + ((mOffset.x + 1) & 1)))))
253 << 16;
254 b = blackLevelSeparate1D(2 + (mOffset.x & 1)) |
255 (blackLevelSeparate1D(2 + ((mOffset.x + 1) & 1)) << 16);
256
257 for (int i = 0; i < 4; i++) {
258 sub_mul[8 + i] = b; // Subtract odd lines
259 sub_mul[12 + i] = mul; // Multiply odd lines
260 }
261
262 sseround = _mm_set_epi32(512, 512, 512, 512);
263 ssesub2 = _mm_set_epi32(32768, 32768, 32768, 32768);
264 ssesign = _mm_set_epi32(0x80008000, 0x80008000, 0x80008000, 0x80008000);
265 sse_full_scale_fp = _mm_set1_epi32(full_scale_fp | (full_scale_fp << 16));
266 sse_half_scale_fp = _mm_set1_epi32(half_scale_fp >> 4);
267
268 if (mDitherScale) {
269 rand_mul = _mm_set1_epi32(0x4d9f1d32);
270 } else {
271 rand_mul = _mm_set1_epi32(0);
272 }
273 rand_mask = _mm_set1_epi32(0x00ff00ff); // 8 random bits
274
275 Array2DRef<uint16_t> out(getU16DataAsUncroppedArray2DRef());
276
277 for (int y = start_y; y < end_y; y++) {
278 __m128i sserandom;
279 if (mDitherScale) {
280 sserandom = _mm_set_epi32(
281 (dim.x * 1676) + (y * 18000), (dim.x * 2342) + (y * 34311),
282 (dim.x * 4272) + (y * 12123), (dim.x * 1234) + (y * 23464));
283 } else {
284 sserandom = _mm_setzero_si128();
285 }
286 __m128i ssescale;
287 __m128i ssesub;
288 if (((y + mOffset.y) & 1) == 0) {
289 ssesub = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[0]));
290 ssescale = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[4]));
291 } else {
292 ssesub = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[8]));
293 ssescale = _mm_loadu_si128(reinterpret_cast<__m128i*>(&sub_mul[12]));
294 }
295
296 for (int x = 0; x < static_cast<int>(roundDown(uncropped_dim.x, 8));
297 x += 8) {
298 __m128i pix_high;
299 __m128i temp;
300 __m128i pix_low =
301 _mm_load_si128(reinterpret_cast<__m128i*>(&out(mOffset.y + y, x)));
302 // Subtract black
303 pix_low = _mm_subs_epu16(pix_low, ssesub);
304 // Multiply the two unsigned shorts and combine it to 32 bit result
305 pix_high = _mm_mulhi_epu16(pix_low, ssescale);
306 temp = _mm_mullo_epi16(pix_low, ssescale);
307 pix_low = _mm_unpacklo_epi16(temp, pix_high);
308 pix_high = _mm_unpackhi_epi16(temp, pix_high);
309 // Add rounder
310 pix_low = _mm_add_epi32(pix_low, sseround);
311 pix_high = _mm_add_epi32(pix_high, sseround);
312
313 sserandom = _mm_xor_si128(_mm_mulhi_epi16(sserandom, rand_mul),
314 _mm_mullo_epi16(sserandom, rand_mul));
315 __m128i rand_masked =
316 _mm_and_si128(sserandom, rand_mask); // Get 8 random bits
317 rand_masked = _mm_mullo_epi16(rand_masked, sse_full_scale_fp);
318
319 __m128i zero = _mm_setzero_si128();
320 __m128i rand_lo = _mm_sub_epi32(sse_half_scale_fp,
321 _mm_unpacklo_epi16(rand_masked, zero));
322 __m128i rand_hi = _mm_sub_epi32(sse_half_scale_fp,
323 _mm_unpackhi_epi16(rand_masked, zero));
324
325 pix_low = _mm_add_epi32(pix_low, rand_lo);
326 pix_high = _mm_add_epi32(pix_high, rand_hi);
327
328 // Shift down
329 pix_low = _mm_srai_epi32(pix_low, 10);
330 pix_high = _mm_srai_epi32(pix_high, 10);
331 // Subtract to avoid clipping
332 pix_low = _mm_sub_epi32(pix_low, ssesub2);
333 pix_high = _mm_sub_epi32(pix_high, ssesub2);
334 // Pack
335 pix_low = _mm_packs_epi32(pix_low, pix_high);
336 // Shift sign off
337 pix_low = _mm_xor_si128(pix_low, ssesign);
338 _mm_store_si128(reinterpret_cast<__m128i*>(&out(mOffset.y + y, x)),
339 pix_low);
340 }
341 }
342}
343#endif
344
345void RawImageDataU16::scaleValues_plain(int start_y, int end_y) {
347
348 assert(blackLevelSeparate->width() == 2 && blackLevelSeparate->height() == 2);
349 auto blackLevelSeparate1D = *blackLevelSeparate->getAsArray1DRef();
350
351 int depth_values = *whitePoint - blackLevelSeparate1D(0);
352 float app_scale = 65535.0F / implicit_cast<float>(depth_values);
353
354 // Scale in 30.2 fp
355 auto full_scale_fp = static_cast<int>(app_scale * 4.0F);
356 // Half Scale in 18.14 fp
357 auto half_scale_fp = static_cast<int>(app_scale * 4095.0F);
358
359 // Not SSE2
360 int gw = dim.x * cpp;
361 std::array<int, 4> mul;
362 std::array<int, 4> sub;
363 for (int i = 0; i < 4; i++) {
364 int v = i;
365 if ((mOffset.x & 1) != 0)
366 v ^= 1;
367 if ((mOffset.y & 1) != 0)
368 v ^= 2;
369 mul[i] = static_cast<int>(
370 16384.0F * 65535.0F /
371 static_cast<float>(*whitePoint - blackLevelSeparate1D(v)));
372 sub[i] = blackLevelSeparate1D(v);
373 }
374 for (int y = start_y; y < end_y; y++) {
375 int v = dim.x + (y * 36969);
376 for (int x = 0; x < gw; x++) {
377 int rand;
378 if (mDitherScale) {
379 v = 18000 * (v & 65535) + (v >> 16);
380 rand = half_scale_fp - (full_scale_fp * (v & 2047));
381 } else {
382 rand = 0;
383 }
384 uint16_t& pixel = img(y, x);
385 pixel = clampBits(((pixel - sub[(2 * (y & 1)) + (x & 1)]) *
386 mul[(2 * (y & 1)) + (x & 1)] +
387 8192 + rand) >>
388 14,
389 16);
390 }
391 }
392}
393
394/* This performs a 4 way interpolated pixel */
395/* The value is interpolated from the 4 closest valid pixels in */
396/* the horizontal and vertical direction. Pixels found further away */
397/* are weighed less */
398
401
402 array<int, 4> values;
403 array<int, 4> dist;
404 array<int, 4> weight;
405
406 values.fill(-1);
407 dist.fill(0);
408 weight.fill(0);
409
410 const auto bad =
412 int step = isCFA ? 2 : 1;
413
414 // Find pixel to the left
415 int x_find = static_cast<int>(x) - step;
416 int curr = 0;
417 while (x_find >= 0 && values[curr] < 0) {
418 if (0 == ((bad(y, x_find >> 3) >> (x_find & 7)) & 1)) {
419 values[curr] = img(y, x_find + component);
420 dist[curr] = static_cast<int>(x) - x_find;
421 }
422 x_find -= step;
423 }
424 // Find pixel to the right
425 x_find = static_cast<int>(x) + step;
426 curr = 1;
427 while (x_find < uncropped_dim.x && values[curr] < 0) {
428 if (0 == ((bad(y, x_find >> 3) >> (x_find & 7)) & 1)) {
429 values[curr] = img(y, x_find + component);
430 dist[curr] = x_find - static_cast<int>(x);
431 }
432 x_find += step;
433 }
434
435 // Find pixel upwards
436 int y_find = static_cast<int>(y) - step;
437 curr = 2;
438 while (y_find >= 0 && values[curr] < 0) {
439 if (0 == ((bad(y_find, x >> 3) >> (x & 7)) & 1)) {
440 values[curr] = img(y_find, x + component);
441 dist[curr] = static_cast<int>(y) - y_find;
442 }
443 y_find -= step;
444 }
445 // Find pixel downwards
446 y_find = static_cast<int>(y) + step;
447 curr = 3;
448 while (y_find < uncropped_dim.y && values[curr] < 0) {
449 if (0 == ((bad(y_find, x >> 3) >> (x & 7)) & 1)) {
450 values[curr] = img(y_find, x + component);
451 dist[curr] = y_find - static_cast<int>(y);
452 }
453 y_find += step;
454 }
455
456 // Find x weights
457 int total_dist_x = dist[0] + dist[1];
458
459 int total_shifts = 7;
460 if (total_dist_x) {
461 weight[0] = dist[0] ? (total_dist_x - dist[0]) * 256 / total_dist_x : 0;
462 weight[1] = 256 - weight[0];
463 total_shifts++;
464 }
465
466 // Find y weights
467 if (int total_dist_y = dist[2] + dist[3]; total_dist_y) {
468 weight[2] = dist[2] ? (total_dist_y - dist[2]) * 256 / total_dist_y : 0;
469 weight[3] = 256 - weight[2];
470 total_shifts++;
471 }
472
473 int total_pixel = 0;
474 for (int i = 0; i < 4; i++)
475 if (values[i] >= 0)
476 total_pixel += values[i] * weight[i];
477
478 total_pixel >>= total_shifts;
479 img(y, x + component) = clampBits(total_pixel, 16);
480
481 /* Process other pixels - could be done inline, since we have the weights */
482 if (cpp > 1 && component == 0)
483 for (int i = 1; i < cpp; i++)
484 fixBadPixel(x, y, i);
485}
486
487// TODO: Could be done with SSE2
488void RawImageDataU16::doLookup(int start_y, int end_y) {
490
491 if (table->ntables == 1) {
492 if (table->dither) {
493 int gw = uncropped_dim.x * cpp;
494 const auto t = table->getTable(0);
495 for (int y = start_y; y < end_y; y++) {
496 uint32_t v = (uncropped_dim.x + y * 13) ^ 0x45694584;
497 for (int x = 0; x < gw; x++) {
498 uint16_t p = img(y, x);
499 uint32_t base = t((2 * p) + 0);
500 uint32_t delta = t((2 * p) + 1);
501 v = 15700 * (v & 65535) + (v >> 16);
502 uint32_t pix = base + ((delta * (v & 2047) + 1024) >> 12);
503 img(y, x) = clampBits(pix, 16);
504 }
505 }
506 return;
507 }
508
509 int gw = uncropped_dim.x * cpp;
510 const auto t = table->getTable(0);
511 for (int y = start_y; y < end_y; y++) {
512 for (int x = 0; x < gw; x++) {
513 img(y, x) = t(img(y, x));
514 }
515 }
516 return;
517 }
518 ThrowRDE("Table lookup with multiple components not implemented");
519}
520
521} // namespace rawspeed
#define ThrowRDE(...)
assert(dim.area() >=area)
dim y
Definition Common.cpp:51
dim x
Definition Common.cpp:50
static Array2DRef< T > create(std::vector< cvless_value_type, AllocatorType > &storage, int width, int height)
Definition Array2DRef.h:98
static bool RAWSPEED_READNONE SSE2()
std::array< int, 4 > blackLevelSeparateStorage
Definition RawImage.h:164
Optional< Array2DRef< int > > blackLevelSeparate
Definition RawImage.h:165
Optional< int > whitePoint
Definition RawImage.h:172
std::vector< BlackArea > blackAreas
Definition RawImage.h:174
Array2DRef< uint16_t > getU16DataAsUncroppedArray2DRef() noexcept
Definition RawImage.h:290
std::vector< uint8_t, AlignedAllocator< uint8_t, 16 > > mBadPixelMap
Definition RawImage.h:180
void startWorker(RawImageWorker::RawImageWorkerTask task, bool cropped)
Definition RawImage.cpp:271
CroppedArray2DRef< uint16_t > getU16DataAsCroppedArray2DRef() noexcept
Definition RawImage.h:304
ImageMetaData metadata
Definition RawImage.h:184
std::unique_ptr< TableLookUp > table
Definition RawImage.h:206
uint32_t mBadPixelMapPitch
Definition RawImage.h:181
RawImageType dataType
Definition RawImage.h:190
void scaleValues_plain(int start_y, int end_y)
void calculateBlackAreas() override
void scaleValues(int start_y, int end_y) override
void doLookup(int start_y, int end_y) override
void fixBadPixel(uint32_t x, uint32_t y, int component=0) override
value_type x
Definition Point.h:102
constexpr RAWSPEED_READNONE Ttgt implicit_cast(Tsrc value)
Definition Casts.h:32
constexpr uint64_t RAWSPEED_READNONE roundDown(uint64_t value, uint64_t multiple)
Definition Common.h:129
void writeLog(DEBUG_PRIO priority, const char *format,...)
Definition Common.cpp:37
Array2DRef(Array1DRef< T > data, int width, int height, int pitch) -> Array2DRef< T >
constexpr auto RAWSPEED_READNONE clampBits(T value, unsigned int nBits)
Definition Bit.h:75