1 /*
2  *  This file is part of RawTherapee.
3  *
4  *  Copyright (c) 2004-2010 Gabor Horvath <hgabor@rawtherapee.com>
5  *
6  *  RawTherapee is free software: you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation, either version 3 of the License, or
9  *  (at your option) any later version.
10  *
11  *  RawTherapee is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with RawTherapee.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 #include <tiffio.h>
20 #include "imagefloat.h"
21 #include "image16.h"
22 #include "image8.h"
23 #include <cstring>
24 #include "rtengine.h"
25 #include "mytime.h"
26 #include "iccstore.h"
27 #include "alignedbuffer.h"
28 #include "rt_math.h"
29 #include "color.h"
30 #include "halffloat.h"
31 #include "sleef.h"
32 
33 namespace rtengine {
34 
Imagefloat()35 Imagefloat::Imagefloat():
36     color_space_("sRGB"),
37     mode_(Mode::RGB)
38 {
39     ws_[0][0] = RT_INFINITY_F;
40     iws_[0][0] = RT_INFINITY_F;
41 }
42 
Imagefloat(int w,int h,const Imagefloat * state_from)43 Imagefloat::Imagefloat(int w, int h, const Imagefloat *state_from):
44     color_space_("sRGB"),
45     mode_(Mode::RGB)
46 {
47     allocate(w, h);
48     ws_[0][0] = RT_INFINITY_F;
49     iws_[0][0] = RT_INFINITY_F;
50     if (state_from) {
51         state_from->copyState(this);
52     }
53 }
54 
~Imagefloat()55 Imagefloat::~Imagefloat ()
56 {
57 }
58 
59 // Call this method to handle floating points input values of different size
setScanline(int row,unsigned char * buffer,int bps,unsigned int numSamples)60 void Imagefloat::setScanline (int row, unsigned char* buffer, int bps, unsigned int numSamples)
61 {
62     if (data == nullptr) {
63         return;
64     }
65 
66     // The DNG decoder convert to 32 bits float data even if the file contains 16 or 24 bits data.
67     // DNG_HalfToFloat and DNG_FP24ToFloat from dcraw.cc can be used to manually convert
68     // from 16 and 24 bits to 32 bits float respectively
69     switch (sampleFormat) {
70     case (IIOSF_FLOAT16): {
71         int ix = 0;
72         uint16_t* sbuffer = (uint16_t*) buffer;
73 
74         for (int i = 0; i < width; i++) {
75             r(row, i) = 65535.f * DNG_HalfToFloat(sbuffer[ix++]);
76             g(row, i) = 65535.f * DNG_HalfToFloat(sbuffer[ix++]);
77             b(row, i) = 65535.f * DNG_HalfToFloat(sbuffer[ix++]);
78         }
79 
80         break;
81     }
82     //case (IIOSF_FLOAT24):
83     case (IIOSF_FLOAT32): {
84         int ix = 0;
85         float* sbuffer = (float*) buffer;
86 
87         for (int i = 0; i < width; i++) {
88             r(row, i) = 65535.f * sbuffer[ix++];
89             g(row, i) = 65535.f * sbuffer[ix++];
90             b(row, i) = 65535.f * sbuffer[ix++];
91         }
92 
93         break;
94     }
95 
96     case (IIOSF_LOGLUV24):
97     case (IIOSF_LOGLUV32): {
98         int ix = 0;
99         float* sbuffer = (float*) buffer;
100         float xyzvalues[3], rgbvalues[3];
101 
102         for (int i = 0; i < width; i++) {
103             xyzvalues[0] = sbuffer[ix++];
104             xyzvalues[1] = sbuffer[ix++];
105             xyzvalues[2] = sbuffer[ix++];
106             // TODO: we may have to handle other color space than sRGB!
107             Color::xyz2srgb(xyzvalues[0], xyzvalues[1], xyzvalues[2], rgbvalues[0], rgbvalues[1], rgbvalues[2]);
108             r(row, i) = rgbvalues[0];
109             g(row, i) = rgbvalues[1];
110             b(row, i) = rgbvalues[2];
111         }
112 
113         break;
114     }
115 
116     default:
117         // Other type are ignored, but could be implemented if necessary
118         break;
119     }
120 }
121 
122 
getScanline(int row,unsigned char * buffer,int bps,bool isFloat) const123 void Imagefloat::getScanline (int row, unsigned char* buffer, int bps, bool isFloat) const
124 {
125 
126     if (data == nullptr) {
127         return;
128     }
129 
130     if (isFloat) {
131         if (bps == 32) {
132             int ix = 0;
133             float* sbuffer = (float*) buffer;
134             // agriggio -- assume the image is normalized to [0, 65535]
135             for (int i = 0; i < width; i++) {
136                 sbuffer[ix++] = r(row, i) / 65535.f;
137                 sbuffer[ix++] = g(row, i) / 65535.f;
138                 sbuffer[ix++] = b(row, i) / 65535.f;
139             }
140         } else if (bps == 16) {
141             int ix = 0;
142             uint16_t* sbuffer = (uint16_t*) buffer;
143             // agriggio -- assume the image is normalized to [0, 65535]
144             for (int i = 0; i < width; i++) {
145                 sbuffer[ix++] = DNG_FloatToHalf(r(row, i) / 65535.f);
146                 sbuffer[ix++] = DNG_FloatToHalf(g(row, i) / 65535.f);
147                 sbuffer[ix++] = DNG_FloatToHalf(b(row, i) / 65535.f);
148             }
149         }
150     } else {
151         unsigned short *sbuffer = (unsigned short *)buffer;
152         for (int i = 0, ix = 0; i < width; i++) {
153             float ri = r(row, i);
154             float gi = g(row, i);
155             float bi = b(row, i);
156             if (bps == 16) {
157                 sbuffer[ix++] = CLIP(ri);
158                 sbuffer[ix++] = CLIP(gi);
159                 sbuffer[ix++] = CLIP(bi);
160             } else if (bps == 8) {
161                 buffer[ix++] = rtengine::uint16ToUint8Rounded(CLIP(ri));
162                 buffer[ix++] = rtengine::uint16ToUint8Rounded(CLIP(gi));
163                 buffer[ix++] = rtengine::uint16ToUint8Rounded(CLIP(bi));
164             }
165         }
166     }
167 }
168 
copy() const169 Imagefloat *Imagefloat::copy() const
170 {
171     Imagefloat* cp = new Imagefloat(width, height);
172     copyTo(cp);
173     return cp;
174 }
175 
176 
copyTo(Imagefloat * dst) const177 void Imagefloat::copyTo(Imagefloat *dst) const
178 {
179     copyData(dst);
180     copyState(dst);
181 }
182 
183 
copyState(Imagefloat * to) const184 void Imagefloat::copyState(Imagefloat *to) const
185 {
186     to->color_space_ = color_space_;
187     to->mode_ = mode_;
188     to->ws_[0][0] = RT_INFINITY_F;
189     to->iws_[0][0] = RT_INFINITY_F;
190 }
191 
192 
193 // This is called by the StdImageSource class. We assume that fp images from StdImageSource don't have to deal with gamma
getStdImage(const ColorTemp & ctemp,int tran,Imagefloat * image,PreviewProps pp) const194 void Imagefloat::getStdImage (const ColorTemp &ctemp, int tran, Imagefloat* image, PreviewProps pp) const
195 {
196 
197     // compute channel multipliers
198     float rm = 1.f, gm = 1.f, bm = 1.f;
199     if (ctemp.getTemp() >= 0) {
200         double drm, dgm, dbm;
201         ctemp.getMultipliers (drm, dgm, dbm);
202         rm = drm;
203         gm = dgm;
204         bm = dbm;
205 
206         rm = 1.0 / rm;
207         gm = 1.0 / gm;
208         bm = 1.0 / bm;
209         float mul_lum = 0.299 * rm + 0.587 * gm + 0.114 * bm;
210         rm /= mul_lum;
211         gm /= mul_lum;
212         bm /= mul_lum;
213     }
214 
215     int sx1, sy1, sx2, sy2;
216 
217     transform (pp, tran, sx1, sy1, sx2, sy2);
218 
219     int imwidth = image->width; // Destination image
220     int imheight = image->height; // Destination image
221 
222     if (((tran & TR_ROT) == TR_R90) || ((tran & TR_ROT) == TR_R270)) {
223         int swap = imwidth;
224         imwidth = imheight;
225         imheight = swap;
226     }
227 
228     int maxx = width; // Source image
229     int maxy = height; // Source image
230     int mtran = tran & TR_ROT;
231     int skip = pp.getSkip();
232 
233     // improve speed by integrating the area division into the multipliers
234     // switched to using ints for the red/green/blue channel buffer.
235     // Incidentally this improves accuracy too.
236     float area = skip * skip;
237     float rm2 = rm;
238     float gm2 = gm;
239     float bm2 = bm;
240     rm /= area;
241     gm /= area;
242     bm /= area;
243 
244     const auto CLIP0 = [](float v) -> float { return std::max(v, 0.f); };
245 
246 #ifdef _OPENMP
247     #pragma omp parallel
248     {
249 #endif
250         AlignedBuffer<float> abR(imwidth);
251         AlignedBuffer<float> abG(imwidth);
252         AlignedBuffer<float> abB(imwidth);
253         float *lineR  = abR.data;
254         float *lineG  = abG.data;
255         float *lineB =  abB.data;
256 
257 #ifdef _OPENMP
258         #pragma omp for
259 #endif
260 
261         for (int iy = 0; iy < imheight; iy++) {
262             if (skip == 1) {
263                 // special case (speedup for 1:1 scale)
264                 // i: source image, first line of the current destination row
265                 int src_y = sy1 + iy;
266 
267                 // overflow security check, not sure that it's necessary
268                 if (src_y >= maxy) {
269                     continue;
270                 }
271 
272                 for (int dst_x = 0, src_x = sx1; dst_x < imwidth; dst_x++, src_x++) {
273                     // overflow security check, not sure that it's necessary
274                     if (src_x >= maxx) {
275                         continue;
276                     }
277 
278                     lineR[dst_x] = CLIP0(rm2 * r(src_y, src_x));
279                     lineG[dst_x] = CLIP0(gm2 * g(src_y, src_x));
280                     lineB[dst_x] = CLIP0(bm2 * b(src_y, src_x));
281                 }
282             } else {
283                 // source image, first line of the current destination row
284                 int src_y = sy1 + skip * iy;
285 
286                 if (src_y >= maxy) {
287                     continue;
288                 }
289 
290                 for (int dst_x = 0, src_x = sx1; dst_x < imwidth; dst_x++, src_x += skip) {
291                     if (src_x >= maxx) {
292                         continue;
293                     }
294 
295                     int src_sub_width = MIN(maxx - src_x, skip);
296                     int src_sub_height = MIN(maxy - src_y, skip);
297 
298                     float rtot, gtot, btot; // RGB accumulators
299                     rtot = gtot = btot = 0.;
300 
301                     for (int src_sub_y = 0; src_sub_y < src_sub_height; src_sub_y++)
302                         for (int src_sub_x = 0; src_sub_x < src_sub_width; src_sub_x++) {
303                             rtot += r(src_y + src_sub_y, src_x + src_sub_x);
304                             gtot += g(src_y + src_sub_y, src_x + src_sub_x);
305                             btot += b(src_y + src_sub_y, src_x + src_sub_x);
306                         }
307 
308                     // convert back to gamma and clip
309                     if (src_sub_width == skip && src_sub_height == skip) {
310                         // Common case where the sub-region is complete
311                         lineR[dst_x] = CLIP0(rm * rtot);
312                         lineG[dst_x] = CLIP0(gm * gtot);
313                         lineB[dst_x] = CLIP0(bm * btot);
314                     } else {
315                         // computing a special factor for this incomplete sub-region
316                         float area = src_sub_width * src_sub_height;
317                         lineR[dst_x] = CLIP0(rm2 * rtot / area);
318                         lineG[dst_x] = CLIP0(gm2 * gtot / area);
319                         lineB[dst_x] = CLIP0(bm2 * btot / area);
320                     }
321                 }
322             }
323 
324             if      (mtran == TR_NONE)
325                 for (int dst_x = 0, src_x = sx1; dst_x < imwidth; dst_x++, src_x += skip) {
326                     image->r(iy, dst_x) = lineR[dst_x];
327                     image->g(iy, dst_x) = lineG[dst_x];
328                     image->b(iy, dst_x) = lineB[dst_x];
329                 }
330             else if (mtran == TR_R180)
331                 for (int dst_x = 0; dst_x < imwidth; dst_x++) {
332                     image->r(imheight - 1 - iy, imwidth - 1 - dst_x) = lineR[dst_x];
333                     image->g(imheight - 1 - iy, imwidth - 1 - dst_x) = lineG[dst_x];
334                     image->b(imheight - 1 - iy, imwidth - 1 - dst_x) = lineB[dst_x];
335                 }
336             else if (mtran == TR_R90)
337                 for (int dst_x = 0, src_x = sx1; dst_x < imwidth; dst_x++, src_x += skip) {
338                     image->r(dst_x, imheight - 1 - iy) = lineR[dst_x];
339                     image->g(dst_x, imheight - 1 - iy) = lineG[dst_x];
340                     image->b(dst_x, imheight - 1 - iy) = lineB[dst_x];
341                 }
342             else if (mtran == TR_R270)
343                 for (int dst_x = 0, src_x = sx1; dst_x < imwidth; dst_x++, src_x += skip) {
344                     image->r(imwidth - 1 - dst_x, iy) = lineR[dst_x];
345                     image->g(imwidth - 1 - dst_x, iy) = lineG[dst_x];
346                     image->b(imwidth - 1 - dst_x, iy) = lineB[dst_x];
347                 }
348         }
349 
350 #ifdef _OPENMP
351     }
352 #endif
353 }
354 
355 Image8*
to8() const356 Imagefloat::to8() const
357 {
358     Image8* img8 = new Image8(width, height);
359 #ifdef _OPENMP
360     #pragma omp parallel for schedule(static)
361 #endif
362 
363     for (int h = 0; h < height; ++h) {
364         for (int w = 0; w < width; ++w) {
365             img8->r(h, w) = uint16ToUint8Rounded(CLIP(r(h, w)));
366             img8->g(h, w) = uint16ToUint8Rounded(CLIP(g(h, w)));
367             img8->b(h, w) = uint16ToUint8Rounded(CLIP(b(h, w)));
368         }
369     }
370 
371     return img8;
372 }
373 
374 Image16*
to16() const375 Imagefloat::to16() const
376 {
377     Image16* img16 = new Image16(width, height);
378 #ifdef _OPENMP
379     #pragma omp parallel for schedule(static)
380 #endif
381 
382     for (int h = 0; h < height; ++h) {
383         for (int w = 0; w < width; ++w) {
384             img16->r(h, w) = CLIP(r(h, w));
385             img16->g(h, w) = CLIP(g(h, w));
386             img16->b(h, w) = CLIP(b(h, w));
387         }
388     }
389 
390     return img16;
391 }
392 
393 
multiply(float factor,bool multithread)394 void Imagefloat::multiply(float factor, bool multithread)
395 {
396     const int W = width;
397     const int H = height;
398 #ifdef __SSE2__
399     vfloat vfactor = F2V(factor);
400 #endif
401 
402 #ifdef _OPENMP
403 #   pragma omp parallel for firstprivate(W, H) schedule(dynamic, 5) if (multithread)
404 #endif
405     for (int y = 0; y < H; y++) {
406         int x = 0;
407 #ifdef __SSE2__
408         for (; x < W-3; x += 4) {
409             vfloat rv = LVF(r(y, x));
410             vfloat gv = LVF(g(y, x));
411             vfloat bv = LVF(b(y, x));
412             STVF(r(y, x), rv * vfactor);
413             STVF(g(y, x), gv * vfactor);
414             STVF(b(y, x), bv * vfactor);
415         }
416 #endif
417         for (; x < W; ++x) {
418             r(y, x) *= factor;
419             g(y, x) *= factor;
420             b(y, x) *= factor;
421         }
422     }
423 }
424 
425 
426 // convert values's range to [0;1] ; this method assumes that the input values's range is [0;65535]
normalizeFloatTo1(bool multithread)427 void Imagefloat::normalizeFloatTo1(bool multithread)
428 {
429     multiply(1.f/65535.f, multithread);
430 }
431 
432 // convert values's range to [0;65535 ; this method assumes that the input values's range is [0;1]
normalizeFloatTo65535(bool multithread)433 void Imagefloat::normalizeFloatTo65535(bool multithread)
434 {
435     multiply(65535.f, multithread);
436 }
437 
calcCroppedHistogram(const ProcParams & params,float scale,LUTu & hist)438 void Imagefloat::calcCroppedHistogram(const ProcParams &params, float scale, LUTu & hist)
439 {
440 
441     hist.clear();
442 
443     // Set up factors to calc the lightness
444     TMatrix wprof = ICCStore::getInstance()->workingSpaceMatrix (params.icm.workingProfile);
445 
446     float facRed   = wprof[1][0];
447     float facGreen = wprof[1][1];
448     float facBlue  = wprof[1][2];
449 
450 
451     // calc pixel size
452     int x1, x2, y1, y2;
453     params.crop.mapToResized(width, height, scale, x1, x2, y1, y2);
454 
455 #ifdef _OPENMP
456     #pragma omp parallel
457 #endif
458     {
459         LUTu histThr(65536);
460         histThr.clear();
461 #ifdef _OPENMP
462         #pragma omp for nowait
463 #endif
464 
465         for (int y = y1; y < y2; y++) {
466             for (int x = x1; x < x2; x++) {
467                 int i = (int)(facRed * r(y, x) + facGreen * g(y, x) + facBlue * b(y, x));
468 
469                 if (i < 0) {
470                     i = 0;
471                 } else if (i > 65535) {
472                     i = 65535;
473                 }
474 
475                 histThr[i]++;
476             }
477         }
478 
479 #ifdef _OPENMP
480         #pragma omp critical
481 #endif
482         {
483             for(int i = 0; i <= 0xffff; i++) {
484                 hist[i] += histThr[i];
485             }
486         }
487     }
488 
489 }
490 
491 // Parallelized transformation; create transform with cmsFLAGS_NOCACHE!
ExecCMSTransform(cmsHTRANSFORM hTransform)492 void Imagefloat::ExecCMSTransform(cmsHTRANSFORM hTransform)
493 {
494 
495     // LittleCMS cannot parallelize planar setups -- Hombre: LCMS2.4 can! But it we use this new feature, memory allocation
496     // have to be modified too to build temporary buffers that allow multi processor execution
497 #ifdef _OPENMP
498     #pragma omp parallel
499 #endif
500     {
501         AlignedBuffer<float> pBuf(width * 3);
502 
503 #ifdef _OPENMP
504         #pragma omp for schedule(dynamic, 16)
505 #endif
506 
507         for (int y = 0; y < height; y++)
508         {
509             float *p = pBuf.data, *pR = r(y), *pG = g(y), *pB = b(y);
510 
511             for (int x = 0; x < width; x++) {
512                 *(p++) = *(pR++);
513                 *(p++) = *(pG++);
514                 *(p++) = *(pB++);
515             }
516 
517             cmsDoTransform (hTransform, pBuf.data, pBuf.data, width);
518 
519             p = pBuf.data;
520             pR = r(y);
521             pG = g(y);
522             pB = b(y);
523 
524             for (int x = 0; x < width; x++) {
525                 *(pR++) = *(p++);
526                 *(pG++) = *(p++);
527                 *(pB++) = *(p++);
528             }
529         } // End of parallelization
530     }
531 }
532 
533 // Parallelized transformation; create transform with cmsFLAGS_NOCACHE!
ExecCMSTransform(cmsHTRANSFORM hTransform,const Imagefloat * src)534 void Imagefloat::ExecCMSTransform(cmsHTRANSFORM hTransform, const Imagefloat *src)
535 {
536     mode_ = Mode::RGB;
537     constexpr int cx = 0, cy = 0;
538 
539     // LittleCMS cannot parallelize planar Lab float images
540     // so build temporary buffers to allow multi processor execution
541 #ifdef _OPENMP
542     #pragma omp parallel
543 #endif
544     {
545         AlignedBuffer<float> bufferSrc(width * 3);
546         AlignedBuffer<float> bufferRGB(width * 3);
547 
548 #ifdef _OPENMP
549         #pragma omp for schedule(static)
550 #endif
551 
552         for (int y = cy; y < cy + height; y++)
553         {
554             float *pRGB, *pR, *pG, *pB;
555             float *pSrc, *psR, *psG, *psB;
556 
557             pSrc = bufferSrc.data;
558             psR = src->r(y) + cx;
559             psG = src->g(y) + cx;
560             psB = src->b(y) + cx;
561 
562             for (int x = 0; x < width; x++) {
563                 *(pSrc++) = *(psR++) / 65535.f;
564                 *(pSrc++) = *(psG++) / 65535.f;
565                 *(pSrc++) = *(psB++) / 65535.f;
566             }
567 
568             cmsDoTransform(hTransform, bufferSrc.data, bufferRGB.data, width);
569 
570             pRGB = bufferRGB.data;
571             pR = r(y - cy);
572             pG = g(y - cy);
573             pB = b(y - cy);
574 
575             for (int x = 0; x < width; x++) {
576                 *(pR++) = *(pRGB++) * 65535.f;
577                 *(pG++) = *(pRGB++) * 65535.f;
578                 *(pB++) = *(pRGB++) * 65535.f;
579             }
580         } // End of parallelization
581     }
582 }
583 
584 
assignColorSpace(const Glib::ustring & space)585 void Imagefloat::assignColorSpace(const Glib::ustring &space)
586 {
587     if (color_space_ != space) {
588         color_space_ = space;
589         ws_[0][0] = RT_INFINITY_F;
590         iws_[0][0] = RT_INFINITY_F;
591     }
592 }
593 
594 
get_ws()595 inline void Imagefloat::get_ws()
596 {
597     if (!std::isfinite(ws_[0][0])) {
598         TMatrix ws = ICCStore::getInstance()->workingSpaceMatrix(color_space_);
599         TMatrix iws = ICCStore::getInstance()->workingSpaceInverseMatrix(color_space_);
600         for (int i = 0; i < 3; ++i) {
601             for (int j = 0; j < 3; ++j) {
602                 ws_[i][j] = float(ws[i][j]);
603                 iws_[i][j] = float(iws[i][j]);
604             }
605         }
606 #ifdef __SSE2__
607         for (int i = 0; i < 3; ++i) {
608             for (int j = 0; j < 3; ++j) {
609                 vws_[i][j] = F2V(float(ws[i][j]));
610                 viws_[i][j] = F2V(float(iws[i][j]));
611             }
612         }
613 #endif
614     }
615 }
616 
617 
setMode(Mode mode,bool multithread)618 void Imagefloat::setMode(Mode mode, bool multithread)
619 {
620     if (mode == this->mode()) {
621         return;
622     }
623 
624     switch (this->mode()) {
625     case Mode::RGB:
626         if (mode == Mode::XYZ) {
627             rgb_to_xyz(multithread);
628         } else if (mode == Mode::YUV) {
629             rgb_to_yuv(multithread);
630         } else {
631             rgb_to_lab(multithread);
632         }
633         break;
634     case Mode::XYZ:
635         if (mode == Mode::RGB) {
636             xyz_to_rgb(multithread);
637         } else if (mode == Mode::YUV) {
638             xyz_to_yuv(multithread);
639         } else {
640             xyz_to_lab(multithread);
641         }
642         break;
643     case Mode::YUV:
644         if (mode == Mode::RGB) {
645             yuv_to_rgb(multithread);
646         } else if (mode == Mode::XYZ) {
647             yuv_to_xyz(multithread);
648         } else {
649             yuv_to_lab(multithread);
650         }
651         break;
652     case Mode::LAB:
653         if (mode == Mode::RGB) {
654             lab_to_rgb(multithread);
655         } else if (mode == Mode::XYZ) {
656             lab_to_xyz(multithread);
657         } else {
658             lab_to_yuv(multithread);
659         }
660     }
661 
662     mode_ = mode;
663 }
664 
665 
rgb_to_xyz(bool multithread)666 void Imagefloat::rgb_to_xyz(bool multithread)
667 {
668     get_ws();
669 
670 #ifdef _OPENMP
671 #   pragma omp parallel for if (multithread)
672 #endif
673     for (int y = 0; y < height; ++y) {
674         int x = 0;
675 #ifdef __SSE2__
676         vfloat Xv, Yv, Zv;
677         for (; x < width-3; x += 4) {
678             vfloat rv = LVF(r(y, x));
679             vfloat gv = LVF(g(y, x));
680             vfloat bv = LVF(b(y, x));
681             Color::rgbxyz(rv, gv, bv, Xv, Yv, Zv, vws_);
682             STVF(r(y, x), Xv);
683             STVF(g(y, x), Yv);
684             STVF(b(y, x), Zv);
685         }
686 #endif
687         for (; x < width; ++x) {
688             float X, Y, Z;
689             Color::rgbxyz(r(y, x), g(y, x), b(y, x), X, Y, Z, ws_);
690             r(y, x) = X;
691             g(y, x) = Y;
692             b(y, x) = Z;
693         }
694     }
695 }
696 
697 
rgb_to_yuv(bool multithread)698 void Imagefloat::rgb_to_yuv(bool multithread)
699 {
700     get_ws();
701 
702 #ifdef _OPENMP
703 #   pragma omp parallel for if (multithread)
704 #endif
705     for (int y = 0; y < height; ++y) {
706         int x = 0;
707 #ifdef __SSE2__
708         vfloat Yv, uv, vv;
709         for (; x < width-3; x += 4) {
710             vfloat rv = LVF(r(y, x));
711             vfloat gv = LVF(g(y, x));
712             vfloat bv = LVF(b(y, x));
713             Color::rgb2yuv(rv, gv, bv, Yv, uv, vv, vws_);
714             STVF(g(y, x), Yv);
715             STVF(b(y, x), uv);
716             STVF(r(y, x), vv);
717         }
718 #endif
719         for (; x < width; ++x) {
720             Color::rgb2yuv(r(y, x), g(y, x), b(y, x), g(y, x), b(y, x), r(y, x), ws_);
721         }
722     }
723 }
724 
725 
xyz_to_rgb(bool multithread)726 void Imagefloat::xyz_to_rgb(bool multithread)
727 {
728     get_ws();
729 
730 #ifdef _OPENMP
731 #   pragma omp parallel for if (multithread)
732 #endif
733     for (int y = 0; y < height; ++y) {
734         int x = 0;
735 #ifdef __SSE2__
736         vfloat Rv, Gv, Bv;
737         for (; x < width-3; x += 4) {
738             vfloat xv = LVF(r(y, x));
739             vfloat yv = LVF(g(y, x));
740             vfloat zv = LVF(b(y, x));
741             Color::xyz2rgb(xv, yv, zv, Rv, Gv, Bv, viws_);
742             STVF(r(y, x), Rv);
743             STVF(g(y, x), Gv);
744             STVF(b(y, x), Bv);
745         }
746 #endif
747         for (; x < width; ++x) {
748             float R, G, B;
749             Color::xyz2rgb(r(y, x), g(y, x), b(y, x), R, G, B, iws_);
750             r(y, x) = R;
751             g(y, x) = G;
752             b(y, x) = B;
753         }
754     }
755 }
756 
757 
xyz_to_yuv(bool multithread)758 void Imagefloat::xyz_to_yuv(bool multithread)
759 {
760     get_ws();
761 
762 #ifdef _OPENMP
763 #   pragma omp parallel for if (multithread)
764 #endif
765     for (int y = 0; y < height; ++y) { // TODO - SSE2 optimization
766         for (int x = 0; x < width; ++x) {
767             float R, G, B;
768             float Y = g(y, x);
769             Color::xyz2rgb(r(y, x), Y, b(y, x), R, G, B, iws_);
770             r(y, x) = R - Y;
771             b(y, x) = Y - B;
772         }
773     }
774 }
775 
776 
yuv_to_rgb(bool multithread)777 void Imagefloat::yuv_to_rgb(bool multithread)
778 {
779     get_ws();
780 
781 #ifdef _OPENMP
782 #   pragma omp parallel for if (multithread)
783 #endif
784     for (int y = 0; y < height; ++y) {
785         int x = 0;
786 #ifdef __SSE2__
787         vfloat Rv, Gv, Bv;
788         for (; x < width-3; x += 4) {
789             vfloat Yv = LVF(g(y, x));
790             vfloat uv = LVF(b(y, x));
791             vfloat vv = LVF(r(y, x));
792             Color::yuv2rgb(Yv, uv, vv, Rv, Gv, Bv, vws_);
793             STVF(r(y, x), Rv);
794             STVF(g(y, x), Gv);
795             STVF(b(y, x), Bv);
796         }
797 #endif
798         for (; x < width; ++x) {
799             Color::yuv2rgb(g(y, x), b(y, x), r(y, x), r(y, x), g(y, x), b(y, x), ws_);
800         }
801     }
802 }
803 
804 
yuv_to_xyz(bool multithread)805 void Imagefloat::yuv_to_xyz(bool multithread)
806 {
807     get_ws();
808 
809 #ifdef _OPENMP
810 #   pragma omp parallel for if (multithread)
811 #endif
812     for (int y = 0; y < height; ++y) { // TODO - SSE2 optimization
813         for (int x = 0; x < width; ++x) {
814             float R, G, B;
815             Color::yuv2rgb(g(y, x), b(y, x), r(y, x), R, G, B, ws_);
816             Color::rgbxyz(R, G, B, r(y, x), g(y, x), b(y, x), ws_);
817         }
818     }
819 }
820 
821 
toLab(LabImage & dst,bool multithread)822 void Imagefloat::toLab(LabImage &dst, bool multithread)
823 {
824     setMode(Mode::LAB, multithread);
825 
826 #ifdef _OPENMP
827 #   pragma omp parallel for if (multithread)
828 #endif
829     for (int y = 0; y < height; ++y) {
830         for (int x = 0; x < width; ++x) {
831             dst.L[y][x] = g(y, x);
832             dst.a[y][x] = r(y, x);
833             dst.b[y][x] = b(y, x);
834         }
835     }
836 }
837 
838 
rgb_to_lab(bool multithread)839 void Imagefloat::rgb_to_lab(bool multithread)
840 {
841     get_ws();
842 
843 #ifdef _OPENMP
844 #   pragma omp parallel for if (multithread)
845 #endif
846     for (int y = 0; y < height; ++y) {
847         int x = 0;
848 #ifdef __SSE2__
849         vfloat Rv, Gv, Bv;
850         vfloat Lv, av, bv;
851         for (; x < width-3; x += 4) {
852             Rv = LVF(r(y, x));
853             Gv = LVF(g(y, x));
854             Bv = LVF(b(y, x));
855             Color::rgb2lab(Rv, Gv, Bv, Lv, av, bv, vws_);
856             STVF(g(y, x), Lv);
857             STVF(r(y, x), av);
858             STVF(b(y, x), bv);
859         }
860 #endif
861         for (; x < width; ++x) {
862             rgb_to_lab(y, x, g(y, x), r(y, x), b(y, x));
863         }
864     }
865 }
866 
867 
rgb_to_lab(int y,int x,float & L,float & a,float & b)868 inline void Imagefloat::rgb_to_lab(int y, int x, float &L, float &a, float &b)
869 {
870     float X, Y, Z;
871     Color::rgbxyz(this->r(y, x), this->g(y, x), this->b(y, x), X, Y, Z, ws_);
872     Color::XYZ2Lab(X, Y, Z, L, a, b);
873 }
874 
875 
xyz_to_lab(bool multithread)876 void Imagefloat::xyz_to_lab(bool multithread)
877 {
878     get_ws();
879 
880 #ifdef _OPENMP
881 #   pragma omp parallel for if (multithread)
882 #endif
883     for (int y = 0; y < height; ++y) {
884         for (int x = 0; x < width; ++x) {
885             xyz_to_lab(y, x, g(y, x), r(y, x), b(y, x));
886         }
887     }
888 }
889 
890 
xyz_to_lab(int y,int x,float & L,float & a,float & b)891 inline void Imagefloat::xyz_to_lab(int y, int x, float &L, float &a, float &b)
892 {
893     Color::XYZ2Lab(this->r(y, x), this->g(y, x), this->b(y, x), L, a, b);
894 }
895 
896 
yuv_to_lab(bool multithread)897 void Imagefloat::yuv_to_lab(bool multithread)
898 {
899     get_ws();
900 
901 #ifdef _OPENMP
902 #   pragma omp parallel for if (multithread)
903 #endif
904     for (int y = 0; y < height; ++y) {
905         int x = 0;
906 #ifdef __SSE2__
907         vfloat Rv, Gv, Bv;
908         vfloat Xv, Yv, Zv;
909         vfloat Lv, av, bv;
910         for (; x < width-3; x += 4) {
911             Rv = LVF(r(y, x));
912             Gv = LVF(g(y, x));
913             Bv = LVF(b(y, x));
914             Color::yuv2rgb(Gv, Bv, Rv, Rv, Gv, Bv, vws_);
915             Color::rgbxyz(Rv, Gv, Bv, Xv, Yv, Zv, vws_);
916             Color::XYZ2Lab(Xv, Yv, Zv, Lv, av, bv);
917             STVF(g(y, x), Lv);
918             STVF(r(y, x), av);
919             STVF(b(y, x), bv);
920         }
921 #endif
922         for (; x < width; ++x) {
923             yuv_to_lab(y, x, g(y, x), r(y, x), b(y, x));
924         }
925     }
926 }
927 
928 
yuv_to_lab(int y,int x,float & L,float & a,float & b)929 inline void Imagefloat::yuv_to_lab(int y, int x, float &L, float &a, float &b)
930 {
931     float R, G, B;
932     float X, Y, Z;
933     Color::yuv2rgb(this->g(y, x), this->b(y, x), this->r(y, x), R, G, B, ws_);
934     Color::rgbxyz(R, G, B, X, Y, Z, ws_);
935     Color::XYZ2Lab(X, Y, Z, L, a, b);
936 }
937 
938 
lab_to_rgb(bool multithread)939 void Imagefloat::lab_to_rgb(bool multithread)
940 {
941     get_ws();
942 
943 #ifdef _OPENMP
944 #   pragma omp parallel for if (multithread)
945 #endif
946     for (int y = 0; y < height; ++y) {
947         // float X, Y, Z;
948         int x = 0;
949 #ifdef __SSE2__
950         vfloat Lv, av, bv;
951         vfloat Rv, Gv, Bv;
952         for (; x < width-3; x += 4) {
953             Lv = LVF(g(y, x));
954             av = LVF(r(y, x));
955             bv = LVF(b(y, x));
956             Color::lab2rgb(Lv, av, bv, Rv, Gv, Bv, viws_);
957             STVF(r(y, x), Rv);
958             STVF(g(y, x), Gv);
959             STVF(b(y, x), Bv);
960         }
961 #endif
962         for (; x < width; ++x) {
963             Color::lab2rgb(this->g(y, x), this->r(y, x), this->b(y, x), this->r(y, x), this->g(y, x), this->b(y, x), iws_);
964             // Color::Lab2XYZ(this->g(y, x), this->r(y, x), this->b(y, x), X, Y, Z);
965             // Color::xyz2rgb(X, Y, Z, this->r(y, x), this->g(y, x), this->b(y, x), iws_);
966         }
967     }
968 }
969 
970 
lab_to_xyz(bool multithread)971 void Imagefloat::lab_to_xyz(bool multithread)
972 {
973     get_ws();
974 
975 #ifdef _OPENMP
976 #   pragma omp parallel for if (multithread)
977 #endif
978     for (int y = 0; y < height; ++y) {
979         for (int x = 0; x < width; ++x) {
980             Color::Lab2XYZ(this->g(y, x), this->r(y, x), this->b(y, x), this->r(y, x), this->g(y, x), this->b(y, x));
981         }
982     }
983 }
984 
985 
lab_to_yuv(bool multithread)986 void Imagefloat::lab_to_yuv(bool multithread)
987 {
988     get_ws();
989 
990 #ifdef _OPENMP
991 #   pragma omp parallel for if (multithread)
992 #endif
993     for (int y = 0; y < height; ++y) {
994         float X, Y, Z;
995         float R, G, B;
996         int x = 0;
997 #ifdef __SSE2__
998         vfloat Lv, av, bv;
999         vfloat Rv, Gv, Bv;
1000         vfloat Xv, Yv, Zv;
1001         for (; x < width-3; x += 4) {
1002             Lv = LVF(g(y, x));
1003             av = LVF(r(y, x));
1004             bv = LVF(b(y, x));
1005             Color::Lab2XYZ(Lv, av, bv, Xv, Yv, Zv);
1006             Color::xyz2rgb(Xv, Yv, Zv, Rv, Gv, Bv, viws_);
1007             STVF(g(y, x), Yv);
1008             STVF(b(y, x), Yv - Bv);
1009             STVF(r(y, x), Rv - Yv);
1010         }
1011 #endif
1012         for (; x < width; ++x) {
1013             Color::Lab2XYZ(this->g(y, x), this->r(y, x), this->b(y, x), X, Y, Z);
1014             Color::xyz2rgb(X, Y, Z, R, G, B, iws_);
1015             this->g(y, x) = Y;
1016             this->b(y, x) = Y - B;
1017             this->r(y, x) = R - Y;
1018         }
1019     }
1020 }
1021 
1022 
getLab(int y,int x,float & L,float & a,float & b)1023 void Imagefloat::getLab(int y, int x, float &L, float &a, float &b)
1024 {
1025     get_ws();
1026     switch (mode()) {
1027     case Mode::RGB:
1028         rgb_to_lab(y, x, L, a, b);
1029         break;
1030     case Mode::XYZ:
1031         xyz_to_lab(y, x, L, a, b);
1032         break;
1033     case Mode::YUV:
1034         yuv_to_lab(y, x, L, a, b);
1035         break;
1036     default:
1037         L = this->g(y, x);
1038         a = this->r(y, x);
1039         b = this->b(y, x);
1040         break;
1041     }
1042 }
1043 
1044 } // namespace rtengine
1045