1 /*
2  * * Copyright (C) 2006-2011 Anders Brander <anders@brander.dk>,
3  * * Anders Kvist <akv@lnxbx.dk> and Klaus Post <klauspost@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
18  */
19 
20 #include "floatplanarimage.h"
21 #include "complexfilter.h"
22 #include <math.h>
23 
24 namespace RawStudio {
25 namespace FFTFilter {
26 
27 float FloatPlanarImage::shortToFloat[65536*4] = {0};
28 
FloatPlanarImage(void)29 FloatPlanarImage::FloatPlanarImage(void) {
30   p = 0;
31   redCorrection = blueCorrection = 1.0f;
32   nPlanes = 0;
33 }
34 
FloatPlanarImage(const FloatPlanarImage & img)35 FloatPlanarImage::FloatPlanarImage( const FloatPlanarImage &img )
36 {
37   nPlanes = img.nPlanes;
38   p = new FloatImagePlane*[nPlanes];
39   for (int i = 0; i < nPlanes; i++)
40     p[i] = new FloatImagePlane(img.p[i]->w, img.p[i]->h, i);
41 
42   bw = img.bw;
43   bh = img.bh;
44   ox = img.ox;
45   oy = img.oy;
46 
47   redCorrection = img.redCorrection;
48   blueCorrection = img.blueCorrection;
49 }
50 
~FloatPlanarImage(void)51 FloatPlanarImage::~FloatPlanarImage(void) {
52   if (p != 0) {
53     for (int i = 0; i < nPlanes; i++) {
54       if (p[i])
55         delete p[i];
56       p[i] = 0;
57     }
58     delete[] p;
59     p = 0;
60   }
61   p = 0;
62   nPlanes = 0;
63 }
64 
allocate_planes()65 void FloatPlanarImage::allocate_planes() {
66   for (int i = 0; i < nPlanes; i++)
67     p[i]->allocateImage();
68 }
69 
mirrorEdges()70 void FloatPlanarImage::mirrorEdges()
71 {
72   for (int i = 0; i < nPlanes; i++)
73     p[i]->mirrorEdges(ox, oy);
74 }
75 
setFilter(int plane,ComplexFilter * f,FFTWindow * window)76 void FloatPlanarImage::setFilter( int plane, ComplexFilter *f, FFTWindow *window )
77 {
78   if (plane >= nPlanes)
79     return;
80   p[plane]->filter = f;
81   p[plane]->window = window;
82 }
83 
84 // TODO: Begs to be SSE2 and/or SMP.
unpackInterleaved(const RS_IMAGE16 * image)85 void FloatPlanarImage::unpackInterleaved( const RS_IMAGE16* image )
86 {
87   // Already demosaiced
88   if (image->channels != 3)
89     return;
90 
91   nPlanes = 3;
92   g_assert(p == 0);
93   p = new FloatImagePlane*[nPlanes];
94 
95   for (int i = 0; i < nPlanes; i++)
96     p[i] = new FloatImagePlane(image->w+ox*2, image->h+oy*2, i);
97 
98   allocate_planes();
99 
100   for (int y = 0; y < image->h; y++ ) {
101     const gushort* pix = GET_PIXEL(image,0,y);
102     gfloat *rp = p[0]->getAt(ox, y+oy);
103     gfloat *gp = p[1]->getAt(ox, y+oy);
104     gfloat *bp = p[2]->getAt(ox, y+oy);
105     for (int x=0; x<image->w; x++) {
106       *rp++ = shortToFloat[*pix];
107       *gp++ = shortToFloat[*(pix+1)];
108       *bp++ = shortToFloat[*(pix+2)];
109       pix += image->pixelsize;
110     }
111   }
112 }
113 
114 // TODO: Begs to be SSE2 and/or SMP. Scalar int<->float is incredibly slow.
packInterleaved(RS_IMAGE16 * image)115 void FloatPlanarImage::packInterleaved( RS_IMAGE16* image )
116 {
117   for (int i = 0; i < nPlanes; i++) {
118     g_assert(p[i]->w == image->w+ox*2);
119     g_assert(p[i]->h == image->h+oy*2);
120   }
121 
122   for (int y = 0; y < image->h; y++ ) {
123     for (int c = 0; c<nPlanes; c++) {
124       gfloat * in = p[c]->getAt(ox, y+oy);
125       gushort* out = GET_PIXEL(image,0,y) + c;
126       for (int x=0; x<image->w; x++) {
127         float fp = *(in++);
128         int p = (int)(fp*fp);
129         *out = clampbits(p,16);
130         out += image->pixelsize;
131       }
132     }
133   }
134 }
135 
getUnpackInterleavedYUVJobs(RS_IMAGE16 * image)136 JobQueue* FloatPlanarImage::getUnpackInterleavedYUVJobs(RS_IMAGE16* image) {
137   // Already demosaiced
138   JobQueue* queue = new JobQueue();
139 
140   if (image->channels != 3)
141     return queue;
142 
143   g_assert(p == 0);
144   nPlanes = 3;
145   p = new FloatImagePlane*[nPlanes];
146 
147   for (int i = 0; i < nPlanes; i++)
148     p[i] = new FloatImagePlane(image->w+ox*2, image->h+oy*2, i);
149 
150   allocate_planes();
151   int threads = rs_get_number_of_processor_cores()*4;
152   int hEvery = MAX(1,(image->h+threads)/threads);
153   for (int i = 0; i < threads; i++) {
154     ImgConvertJob *j = new ImgConvertJob(this,JOB_CONVERT_TOFLOAT_YUV);
155     j->start_y = i*hEvery;
156     j->end_y = MIN((i+1)*hEvery,image->h);
157     j->rs = image;
158     queue->addJob(j);
159   }
160   return queue;
161 }
162 
163 
unpackInterleavedYUV(const ImgConvertJob * j)164 void FloatPlanarImage::unpackInterleavedYUV( const ImgConvertJob* j )
165 {
166   RS_IMAGE16* image = j->rs;
167 
168   // We cannot allow red/blue to become negative, since we need to square root it for gamma correction
169   redCorrection = MAX(0.0f, redCorrection);
170   blueCorrection = MAX(0.0f, blueCorrection);
171 
172 #if defined (__x86_64__)
173   if (image->pixelsize == 4)
174     return unpackInterleavedYUV_SSE2(j);
175 #endif
176 
177   // We cannot look up more than 65535*4
178   redCorrection = MIN( 4.0f, redCorrection);
179   blueCorrection = MIN( 4.0f, blueCorrection);
180 
181   gint redc = (gint)(8192 * redCorrection + 0.5);
182   gint bluec = (gint)(8192 * blueCorrection + 0.5);
183 
184   for (int y = j->start_y; y < j->end_y; y++ ) {
185     const gushort* pix = GET_PIXEL(image,0,y);
186     gfloat *Y = p[0]->getAt(ox, y+oy);
187     gfloat *Cb = p[1]->getAt(ox, y+oy);
188     gfloat *Cr = p[2]->getAt(ox, y+oy);
189     for (int x=0; x<image->w; x++) {
190       float r = shortToFloat[((*pix)*redc)>>13];
191       float g = shortToFloat[(*(pix+1))];
192       float b = shortToFloat[((*(pix+2))*bluec)>>13];
193       *Y++ = r * 0.299 + g * 0.587 + b * 0.114 ;
194       *Cb++ = r * -0.169 + g * -0.331 + b * 0.499;
195       *Cr++ = r * 0.499 + g * -0.418 + b * -0.0813;
196       pix += image->pixelsize;
197     }
198   }
199 }
200 
getPackInterleavedYUVJobs(RS_IMAGE16 * image)201 JobQueue* FloatPlanarImage::getPackInterleavedYUVJobs(RS_IMAGE16* image) {
202   JobQueue* queue = new JobQueue();
203 
204   if (image->channels != 3)
205     return queue;
206 
207   for (int i = 0; i < nPlanes; i++) {
208     g_assert(p[i]->w == image->w+ox*2);
209     g_assert(p[i]->h == image->h+oy*2);
210   }
211 
212   int threads = rs_get_number_of_processor_cores()*4;
213   int hEvery = MAX(1,(image->h+threads)/threads);
214   for (int i = 0; i < threads; i++) {
215     ImgConvertJob *j = new ImgConvertJob(this,JOB_CONVERT_FROMFLOAT_YUV);
216     j->start_y = i*hEvery;
217     j->end_y = MIN((i+1)*hEvery,image->h);
218     j->rs = image;
219     queue->addJob(j);
220   }
221   return queue;
222 }
223 
packInterleavedYUV(const ImgConvertJob * j)224 void FloatPlanarImage::packInterleavedYUV( const ImgConvertJob* j)
225 {
226   RS_IMAGE16* image = j->rs;
227   guint cpu = rs_detect_cpu_features();
228 #if defined (__x86_64__)
229   if ((image->pixelsize == 4) && (cpu & RS_CPU_FLAG_SSE4_1))  {
230     // TODO: Test on SSE4 capable machine before enabling.
231 //    packInterleavedYUV_SSE4(j);
232 //    return;
233   }
234 #endif
235 #if defined (__i386__) || defined (__x86_64__)
236   if ((image->pixelsize == 4) && (cpu & RS_CPU_FLAG_SSE2))  {
237     packInterleavedYUV_SSE2(j);
238     return;
239   }
240 #endif
241   gfloat r_factor = (1.0f/redCorrection);
242   gfloat b_factor = (1.0f/blueCorrection);
243   for (int y = j->start_y; y < j->end_y; y++ ) {
244     gfloat *Y = p[0]->getAt(ox, y+oy);
245     gfloat *Cb = p[1]->getAt(ox, y+oy);
246     gfloat *Cr = p[2]->getAt(ox, y+oy);
247     gushort* out = GET_PIXEL(image,0,y);
248     for (int x=0; x<image->w; x++) {
249       float fr = (Y[x] + 1.402 * Cr[x]);
250       float fg = Y[x] - 0.344 * Cb[x] - 0.714 * Cr[x];
251       float fb = (Y[x] + 1.772 * Cb[x]) ;
252       int r = (int)(fr*fr* r_factor);
253       int g = (int)(fg*fg);
254       int b = (int)(fb*fb* b_factor);
255       out[0] = clampbits(r,16);
256       out[1] = clampbits(g,16);
257       out[2] = clampbits(b,16);
258       out += image->pixelsize;
259     }
260   }
261 }
262 
263 
getJobs(FloatPlanarImage & outImg)264 JobQueue* FloatPlanarImage::getJobs(FloatPlanarImage &outImg) {
265   JobQueue *jobs = new JobQueue();
266 
267   for (int i = 0; i < nPlanes; i++)
268     p[i]->addJobs(jobs, bw, bh, ox, oy, outImg.p[i]);
269 
270   return jobs;
271 }
272 
getPlaneSliceFrom(int plane,int x,int y)273 FloatImagePlane* FloatPlanarImage::getPlaneSliceFrom( int plane, int x, int y )
274 {
275   g_assert(plane>=0 && plane<nPlanes);
276   return p[plane]->getSlice(x,y,ox,oy);
277 }
278 
initConvTable()279 void FloatPlanarImage::initConvTable() {
280   for (int i = 0; i < 65536*4; i++) {
281     shortToFloat[i] = sqrt((float)i);
282   }
283 }
284 
285 }}// namespace RawStudio::FFTFilter
286 
287