21#include "rawspeedconfig.h"
63 std::vector<uint16_t> histogramStorage;
66 for (
int row = 0; row != histogram.height(); ++row) {
67 for (
int col = 0; col != histogram.width(); ++col) {
68 histogram(row, col) = 0;
77 area.size = area.size - (area.size & 1);
80 if (!area.isVertical) {
81 if (
static_cast<int>(area.offset) +
static_cast<int>(area.size) >
83 ThrowRDE(
"Offset + size is larger than height of image");
84 for (
uint32_t y = area.offset;
y < area.offset + area.size;
y++) {
87 const auto localhist = histogram[(2 * (
y & 1)) + (
x & 1)];
92 totalpixels += area.size *
dim.x;
96 if (area.isVertical) {
97 if (
static_cast<int>(area.offset) +
static_cast<int>(area.size) >
99 ThrowRDE(
"Offset + size is larger than width of image");
101 for (
uint32_t x = area.offset;
x < area.size + area.offset;
x++) {
103 const auto localhist = histogram[(2 * (
y & 1)) + (
x & 1)];
104 const auto hBin = img(
y, area.offset);
108 totalpixels += area.size *
dim.y;
116 for (
int& i : blackLevelSeparate1D)
124 totalpixels /= 4 * 2;
126 for (
int i = 0; i < 4; i++) {
127 const auto localhist = histogram[i];
128 int acc_pixels = localhist(0);
130 while (acc_pixels <= totalpixels && pixel_value < 65535) {
132 acc_pixels += localhist(pixel_value);
134 blackLevelSeparate1D(i) = pixel_value;
141 for (
int i : blackLevelSeparate1D)
143 for (
int& i : blackLevelSeparate1D)
144 i = (total + 2) >> 2;
149 const int skipBorder = 250;
150 int gw = (
dim.x - skipBorder) *
cpp;
156 for (
int row = skipBorder; row < (
dim.y - skipBorder); row++) {
157 for (
int col = skipBorder; col < gw; col++) {
158 uint16_t pixel = img(row, skipBorder + col);
159 b = min(
static_cast<int>(pixel), b);
160 m = max(
static_cast<int>(pixel), m);
168 "ISO:%d, Estimated black:%d, Estimated white: %d",
192 int depth_values = *
whitePoint - (*blackLevelSeparate)(0, 0);
197 scaleValues_SSE2(start_y, end_y);
206void RawImageDataU16::scaleValues_SSE2(
int start_y,
int end_y) {
210 int depth_values = *
whitePoint - blackLevelSeparate1D(0);
214 auto full_scale_fp =
static_cast<int>(app_scale * 4.0F);
216 auto half_scale_fp =
static_cast<int>(app_scale * 4095.0F);
223 __m128i sse_full_scale_fp;
224 __m128i sse_half_scale_fp;
226 std::array<uint32_t, 4 * 4> sub_mul;
232 mul |= (
static_cast<int>(
235 blackLevelSeparate1D((
mOffset.
x + 1) & 1))))
238 (blackLevelSeparate1D((
mOffset.
x + 1) & 1) << 16);
240 for (
int i = 0; i < 4; i++) {
242 sub_mul[4 + i] = mul;
245 mul =
static_cast<int>(
248 blackLevelSeparate1D(2 + (
mOffset.
x & 1))));
249 mul |= (
static_cast<int>(
251 static_cast<float>(*
whitePoint - blackLevelSeparate1D(
254 b = blackLevelSeparate1D(2 + (
mOffset.
x & 1)) |
255 (blackLevelSeparate1D(2 + ((
mOffset.
x + 1) & 1)) << 16);
257 for (
int i = 0; i < 4; i++) {
259 sub_mul[12 + i] = mul;
262 sseround = _mm_set_epi32(512, 512, 512, 512);
263 ssesub2 = _mm_set_epi32(32768, 32768, 32768, 32768);
264 ssesign = _mm_set_epi32(0x80008000, 0x80008000, 0x80008000, 0x80008000);
265 sse_full_scale_fp = _mm_set1_epi32(full_scale_fp | (full_scale_fp << 16));
266 sse_half_scale_fp = _mm_set1_epi32(half_scale_fp >> 4);
269 rand_mul = _mm_set1_epi32(0x4d9f1d32);
271 rand_mul = _mm_set1_epi32(0);
273 rand_mask = _mm_set1_epi32(0x00ff00ff);
277 for (
int y = start_y;
y < end_y;
y++) {
280 sserandom = _mm_set_epi32(
281 (
dim.x * 1676) + (
y * 18000), (
dim.x * 2342) + (
y * 34311),
282 (
dim.x * 4272) + (
y * 12123), (
dim.x * 1234) + (
y * 23464));
284 sserandom = _mm_setzero_si128();
289 ssesub = _mm_loadu_si128(
reinterpret_cast<__m128i*
>(&sub_mul[0]));
290 ssescale = _mm_loadu_si128(
reinterpret_cast<__m128i*
>(&sub_mul[4]));
292 ssesub = _mm_loadu_si128(
reinterpret_cast<__m128i*
>(&sub_mul[8]));
293 ssescale = _mm_loadu_si128(
reinterpret_cast<__m128i*
>(&sub_mul[12]));
301 _mm_load_si128(
reinterpret_cast<__m128i*
>(&out(
mOffset.y +
y,
x)));
303 pix_low = _mm_subs_epu16(pix_low, ssesub);
305 pix_high = _mm_mulhi_epu16(pix_low, ssescale);
306 temp = _mm_mullo_epi16(pix_low, ssescale);
307 pix_low = _mm_unpacklo_epi16(temp, pix_high);
308 pix_high = _mm_unpackhi_epi16(temp, pix_high);
310 pix_low = _mm_add_epi32(pix_low, sseround);
311 pix_high = _mm_add_epi32(pix_high, sseround);
313 sserandom = _mm_xor_si128(_mm_mulhi_epi16(sserandom, rand_mul),
314 _mm_mullo_epi16(sserandom, rand_mul));
315 __m128i rand_masked =
316 _mm_and_si128(sserandom, rand_mask);
317 rand_masked = _mm_mullo_epi16(rand_masked, sse_full_scale_fp);
319 __m128i zero = _mm_setzero_si128();
320 __m128i rand_lo = _mm_sub_epi32(sse_half_scale_fp,
321 _mm_unpacklo_epi16(rand_masked, zero));
322 __m128i rand_hi = _mm_sub_epi32(sse_half_scale_fp,
323 _mm_unpackhi_epi16(rand_masked, zero));
325 pix_low = _mm_add_epi32(pix_low, rand_lo);
326 pix_high = _mm_add_epi32(pix_high, rand_hi);
329 pix_low = _mm_srai_epi32(pix_low, 10);
330 pix_high = _mm_srai_epi32(pix_high, 10);
332 pix_low = _mm_sub_epi32(pix_low, ssesub2);
333 pix_high = _mm_sub_epi32(pix_high, ssesub2);
335 pix_low = _mm_packs_epi32(pix_low, pix_high);
337 pix_low = _mm_xor_si128(pix_low, ssesign);
338 _mm_store_si128(
reinterpret_cast<__m128i*
>(&out(
mOffset.y +
y,
x)),
351 int depth_values = *
whitePoint - blackLevelSeparate1D(0);
355 auto full_scale_fp =
static_cast<int>(app_scale * 4.0F);
357 auto half_scale_fp =
static_cast<int>(app_scale * 4095.0F);
361 std::array<int, 4> mul;
362 std::array<int, 4> sub;
363 for (
int i = 0; i < 4; i++) {
369 mul[i] =
static_cast<int>(
370 16384.0F * 65535.0F /
371 static_cast<float>(*
whitePoint - blackLevelSeparate1D(v)));
372 sub[i] = blackLevelSeparate1D(v);
374 for (
int y = start_y;
y < end_y;
y++) {
375 int v =
dim.x + (
y * 36969);
376 for (
int x = 0;
x < gw;
x++) {
379 v = 18000 * (v & 65535) + (v >> 16);
380 rand = half_scale_fp - (full_scale_fp * (v & 2047));
385 pixel =
clampBits(((pixel - sub[(2 * (
y & 1)) + (
x & 1)]) *
386 mul[(2 * (
y & 1)) + (
x & 1)] +
402 array<int, 4> values;
404 array<int, 4> weight;
412 int step =
isCFA ? 2 : 1;
415 int x_find =
static_cast<int>(
x) - step;
417 while (x_find >= 0 && values[curr] < 0) {
418 if (0 == ((bad(
y, x_find >> 3) >> (x_find & 7)) & 1)) {
419 values[curr] = img(
y, x_find + component);
420 dist[curr] =
static_cast<int>(
x) - x_find;
425 x_find =
static_cast<int>(
x) + step;
428 if (0 == ((bad(
y, x_find >> 3) >> (x_find & 7)) & 1)) {
429 values[curr] = img(
y, x_find + component);
430 dist[curr] = x_find -
static_cast<int>(
x);
436 int y_find =
static_cast<int>(
y) - step;
438 while (y_find >= 0 && values[curr] < 0) {
439 if (0 == ((bad(y_find,
x >> 3) >> (
x & 7)) & 1)) {
440 values[curr] = img(y_find,
x + component);
441 dist[curr] =
static_cast<int>(
y) - y_find;
446 y_find =
static_cast<int>(
y) + step;
449 if (0 == ((bad(y_find,
x >> 3) >> (
x & 7)) & 1)) {
450 values[curr] = img(y_find,
x + component);
451 dist[curr] = y_find -
static_cast<int>(
y);
457 int total_dist_x = dist[0] + dist[1];
459 int total_shifts = 7;
461 weight[0] = dist[0] ? (total_dist_x - dist[0]) * 256 / total_dist_x : 0;
462 weight[1] = 256 - weight[0];
467 if (
int total_dist_y = dist[2] + dist[3]; total_dist_y) {
468 weight[2] = dist[2] ? (total_dist_y - dist[2]) * 256 / total_dist_y : 0;
469 weight[3] = 256 - weight[2];
474 for (
int i = 0; i < 4; i++)
476 total_pixel += values[i] * weight[i];
478 total_pixel >>= total_shifts;
479 img(
y,
x + component) =
clampBits(total_pixel, 16);
482 if (
cpp > 1 && component == 0)
483 for (
int i = 1; i <
cpp; i++)
491 if (
table->ntables == 1) {
494 const auto t =
table->getTable(0);
495 for (
int y = start_y;
y < end_y;
y++) {
497 for (
int x = 0;
x < gw;
x++) {
501 v = 15700 * (v & 65535) + (v >> 16);
502 uint32_t pix = base + ((delta * (v & 2047) + 1024) >> 12);
510 const auto t =
table->getTable(0);
511 for (
int y = start_y;
y < end_y;
y++) {
512 for (
int x = 0;
x < gw;
x++) {
513 img(
y,
x) = t(img(
y,
x));
518 ThrowRDE(
"Table lookup with multiple components not implemented");
assert(dim.area() >=area)
static Array2DRef< T > create(std::vector< cvless_value_type, AllocatorType > &storage, int width, int height)
static bool RAWSPEED_READNONE SSE2()
std::array< int, 4 > blackLevelSeparateStorage
Optional< Array2DRef< int > > blackLevelSeparate
Optional< int > whitePoint
std::vector< BlackArea > blackAreas
Array2DRef< uint16_t > getU16DataAsUncroppedArray2DRef() noexcept
std::vector< uint8_t, AlignedAllocator< uint8_t, 16 > > mBadPixelMap
void startWorker(RawImageWorker::RawImageWorkerTask task, bool cropped)
CroppedArray2DRef< uint16_t > getU16DataAsCroppedArray2DRef() noexcept
std::unique_ptr< TableLookUp > table
uint32_t mBadPixelMapPitch
void scaleBlackWhite() override
void scaleValues_plain(int start_y, int end_y)
void calculateBlackAreas() override
void scaleValues(int start_y, int end_y) override
void doLookup(int start_y, int end_y) override
void fixBadPixel(uint32_t x, uint32_t y, int component=0) override
constexpr RAWSPEED_READNONE Ttgt implicit_cast(Tsrc value)
constexpr uint64_t RAWSPEED_READNONE roundDown(uint64_t value, uint64_t multiple)
void writeLog(DEBUG_PRIO priority, const char *format,...)
Array2DRef(Array1DRef< T > data, int width, int height, int pitch) -> Array2DRef< T >
constexpr auto RAWSPEED_READNONE clampBits(T value, unsigned int nBits)