1 /*
2 * * Copyright (C) 2006-2011 Anders Brander <anders@brander.dk>,
3 * * Anders Kvist <akv@lnxbx.dk> and Klaus Post <klauspost@gmail.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 */
19
20 #include "floatplanarimage.h"
21 #include "complexfilter.h"
22 #include <math.h>
23
24 namespace RawStudio {
25 namespace FFTFilter {
26
27 float FloatPlanarImage::shortToFloat[65536*4] = {0};
28
FloatPlanarImage(void)29 FloatPlanarImage::FloatPlanarImage(void) {
30 p = 0;
31 redCorrection = blueCorrection = 1.0f;
32 nPlanes = 0;
33 }
34
FloatPlanarImage(const FloatPlanarImage & img)35 FloatPlanarImage::FloatPlanarImage( const FloatPlanarImage &img )
36 {
37 nPlanes = img.nPlanes;
38 p = new FloatImagePlane*[nPlanes];
39 for (int i = 0; i < nPlanes; i++)
40 p[i] = new FloatImagePlane(img.p[i]->w, img.p[i]->h, i);
41
42 bw = img.bw;
43 bh = img.bh;
44 ox = img.ox;
45 oy = img.oy;
46
47 redCorrection = img.redCorrection;
48 blueCorrection = img.blueCorrection;
49 }
50
~FloatPlanarImage(void)51 FloatPlanarImage::~FloatPlanarImage(void) {
52 if (p != 0) {
53 for (int i = 0; i < nPlanes; i++) {
54 if (p[i])
55 delete p[i];
56 p[i] = 0;
57 }
58 delete[] p;
59 p = 0;
60 }
61 p = 0;
62 nPlanes = 0;
63 }
64
allocate_planes()65 void FloatPlanarImage::allocate_planes() {
66 for (int i = 0; i < nPlanes; i++)
67 p[i]->allocateImage();
68 }
69
mirrorEdges()70 void FloatPlanarImage::mirrorEdges()
71 {
72 for (int i = 0; i < nPlanes; i++)
73 p[i]->mirrorEdges(ox, oy);
74 }
75
setFilter(int plane,ComplexFilter * f,FFTWindow * window)76 void FloatPlanarImage::setFilter( int plane, ComplexFilter *f, FFTWindow *window )
77 {
78 if (plane >= nPlanes)
79 return;
80 p[plane]->filter = f;
81 p[plane]->window = window;
82 }
83
84 // TODO: Begs to be SSE2 and/or SMP.
unpackInterleaved(const RS_IMAGE16 * image)85 void FloatPlanarImage::unpackInterleaved( const RS_IMAGE16* image )
86 {
87 // Already demosaiced
88 if (image->channels != 3)
89 return;
90
91 nPlanes = 3;
92 g_assert(p == 0);
93 p = new FloatImagePlane*[nPlanes];
94
95 for (int i = 0; i < nPlanes; i++)
96 p[i] = new FloatImagePlane(image->w+ox*2, image->h+oy*2, i);
97
98 allocate_planes();
99
100 for (int y = 0; y < image->h; y++ ) {
101 const gushort* pix = GET_PIXEL(image,0,y);
102 gfloat *rp = p[0]->getAt(ox, y+oy);
103 gfloat *gp = p[1]->getAt(ox, y+oy);
104 gfloat *bp = p[2]->getAt(ox, y+oy);
105 for (int x=0; x<image->w; x++) {
106 *rp++ = shortToFloat[*pix];
107 *gp++ = shortToFloat[*(pix+1)];
108 *bp++ = shortToFloat[*(pix+2)];
109 pix += image->pixelsize;
110 }
111 }
112 }
113
114 // TODO: Begs to be SSE2 and/or SMP. Scalar int<->float is incredibly slow.
packInterleaved(RS_IMAGE16 * image)115 void FloatPlanarImage::packInterleaved( RS_IMAGE16* image )
116 {
117 for (int i = 0; i < nPlanes; i++) {
118 g_assert(p[i]->w == image->w+ox*2);
119 g_assert(p[i]->h == image->h+oy*2);
120 }
121
122 for (int y = 0; y < image->h; y++ ) {
123 for (int c = 0; c<nPlanes; c++) {
124 gfloat * in = p[c]->getAt(ox, y+oy);
125 gushort* out = GET_PIXEL(image,0,y) + c;
126 for (int x=0; x<image->w; x++) {
127 float fp = *(in++);
128 int p = (int)(fp*fp);
129 *out = clampbits(p,16);
130 out += image->pixelsize;
131 }
132 }
133 }
134 }
135
getUnpackInterleavedYUVJobs(RS_IMAGE16 * image)136 JobQueue* FloatPlanarImage::getUnpackInterleavedYUVJobs(RS_IMAGE16* image) {
137 // Already demosaiced
138 JobQueue* queue = new JobQueue();
139
140 if (image->channels != 3)
141 return queue;
142
143 g_assert(p == 0);
144 nPlanes = 3;
145 p = new FloatImagePlane*[nPlanes];
146
147 for (int i = 0; i < nPlanes; i++)
148 p[i] = new FloatImagePlane(image->w+ox*2, image->h+oy*2, i);
149
150 allocate_planes();
151 int threads = rs_get_number_of_processor_cores()*4;
152 int hEvery = MAX(1,(image->h+threads)/threads);
153 for (int i = 0; i < threads; i++) {
154 ImgConvertJob *j = new ImgConvertJob(this,JOB_CONVERT_TOFLOAT_YUV);
155 j->start_y = i*hEvery;
156 j->end_y = MIN((i+1)*hEvery,image->h);
157 j->rs = image;
158 queue->addJob(j);
159 }
160 return queue;
161 }
162
163
unpackInterleavedYUV(const ImgConvertJob * j)164 void FloatPlanarImage::unpackInterleavedYUV( const ImgConvertJob* j )
165 {
166 RS_IMAGE16* image = j->rs;
167
168 // We cannot allow red/blue to become negative, since we need to square root it for gamma correction
169 redCorrection = MAX(0.0f, redCorrection);
170 blueCorrection = MAX(0.0f, blueCorrection);
171
172 #if defined (__x86_64__)
173 if (image->pixelsize == 4)
174 return unpackInterleavedYUV_SSE2(j);
175 #endif
176
177 // We cannot look up more than 65535*4
178 redCorrection = MIN( 4.0f, redCorrection);
179 blueCorrection = MIN( 4.0f, blueCorrection);
180
181 gint redc = (gint)(8192 * redCorrection + 0.5);
182 gint bluec = (gint)(8192 * blueCorrection + 0.5);
183
184 for (int y = j->start_y; y < j->end_y; y++ ) {
185 const gushort* pix = GET_PIXEL(image,0,y);
186 gfloat *Y = p[0]->getAt(ox, y+oy);
187 gfloat *Cb = p[1]->getAt(ox, y+oy);
188 gfloat *Cr = p[2]->getAt(ox, y+oy);
189 for (int x=0; x<image->w; x++) {
190 float r = shortToFloat[((*pix)*redc)>>13];
191 float g = shortToFloat[(*(pix+1))];
192 float b = shortToFloat[((*(pix+2))*bluec)>>13];
193 *Y++ = r * 0.299 + g * 0.587 + b * 0.114 ;
194 *Cb++ = r * -0.169 + g * -0.331 + b * 0.499;
195 *Cr++ = r * 0.499 + g * -0.418 + b * -0.0813;
196 pix += image->pixelsize;
197 }
198 }
199 }
200
getPackInterleavedYUVJobs(RS_IMAGE16 * image)201 JobQueue* FloatPlanarImage::getPackInterleavedYUVJobs(RS_IMAGE16* image) {
202 JobQueue* queue = new JobQueue();
203
204 if (image->channels != 3)
205 return queue;
206
207 for (int i = 0; i < nPlanes; i++) {
208 g_assert(p[i]->w == image->w+ox*2);
209 g_assert(p[i]->h == image->h+oy*2);
210 }
211
212 int threads = rs_get_number_of_processor_cores()*4;
213 int hEvery = MAX(1,(image->h+threads)/threads);
214 for (int i = 0; i < threads; i++) {
215 ImgConvertJob *j = new ImgConvertJob(this,JOB_CONVERT_FROMFLOAT_YUV);
216 j->start_y = i*hEvery;
217 j->end_y = MIN((i+1)*hEvery,image->h);
218 j->rs = image;
219 queue->addJob(j);
220 }
221 return queue;
222 }
223
packInterleavedYUV(const ImgConvertJob * j)224 void FloatPlanarImage::packInterleavedYUV( const ImgConvertJob* j)
225 {
226 RS_IMAGE16* image = j->rs;
227 guint cpu = rs_detect_cpu_features();
228 #if defined (__x86_64__)
229 if ((image->pixelsize == 4) && (cpu & RS_CPU_FLAG_SSE4_1)) {
230 // TODO: Test on SSE4 capable machine before enabling.
231 // packInterleavedYUV_SSE4(j);
232 // return;
233 }
234 #endif
235 #if defined (__i386__) || defined (__x86_64__)
236 if ((image->pixelsize == 4) && (cpu & RS_CPU_FLAG_SSE2)) {
237 packInterleavedYUV_SSE2(j);
238 return;
239 }
240 #endif
241 gfloat r_factor = (1.0f/redCorrection);
242 gfloat b_factor = (1.0f/blueCorrection);
243 for (int y = j->start_y; y < j->end_y; y++ ) {
244 gfloat *Y = p[0]->getAt(ox, y+oy);
245 gfloat *Cb = p[1]->getAt(ox, y+oy);
246 gfloat *Cr = p[2]->getAt(ox, y+oy);
247 gushort* out = GET_PIXEL(image,0,y);
248 for (int x=0; x<image->w; x++) {
249 float fr = (Y[x] + 1.402 * Cr[x]);
250 float fg = Y[x] - 0.344 * Cb[x] - 0.714 * Cr[x];
251 float fb = (Y[x] + 1.772 * Cb[x]) ;
252 int r = (int)(fr*fr* r_factor);
253 int g = (int)(fg*fg);
254 int b = (int)(fb*fb* b_factor);
255 out[0] = clampbits(r,16);
256 out[1] = clampbits(g,16);
257 out[2] = clampbits(b,16);
258 out += image->pixelsize;
259 }
260 }
261 }
262
263
getJobs(FloatPlanarImage & outImg)264 JobQueue* FloatPlanarImage::getJobs(FloatPlanarImage &outImg) {
265 JobQueue *jobs = new JobQueue();
266
267 for (int i = 0; i < nPlanes; i++)
268 p[i]->addJobs(jobs, bw, bh, ox, oy, outImg.p[i]);
269
270 return jobs;
271 }
272
getPlaneSliceFrom(int plane,int x,int y)273 FloatImagePlane* FloatPlanarImage::getPlaneSliceFrom( int plane, int x, int y )
274 {
275 g_assert(plane>=0 && plane<nPlanes);
276 return p[plane]->getSlice(x,y,ox,oy);
277 }
278
initConvTable()279 void FloatPlanarImage::initConvTable() {
280 for (int i = 0; i < 65536*4; i++) {
281 shortToFloat[i] = sqrt((float)i);
282 }
283 }
284
285 }}// namespace RawStudio::FFTFilter
286
287