1 /* 2 */ 3 4 /* 5 6 Copyright (C) 2014 Ferrero Andrea 7 8 This program is free software: you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation, either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program. If not, see <http://www.gnu.org/licenses/>. 20 21 22 */ 23 24 /* 25 26 These files are distributed with PhotoFlow - http://aferrero2707.github.io/PhotoFlow/ 27 28 */ 29 30 #ifndef PF_NLMEANS_H 31 #define PF_NLMEANS_H 32 33 #include <string.h> 34 35 #include "../base/processor.hh" 36 37 namespace PF 38 { 39 40 class NonLocalMeansPar: public OpParBase 41 { 42 Property<float> radius, strength, luma_frac, chroma_frac; 43 44 int P, K; 45 float sharpness; 46 47 ProcessorBase* convert2lab; 48 ProcessorBase* convert2input; 49 ProcessorBase* nlmeans_algo; 50 51 PF::ICCProfile* in_profile; 52 53 public: 54 NonLocalMeansPar(); 55 has_intensity()56 bool has_intensity() { return false; } needs_caching()57 bool needs_caching() { return false; } 58 set_radius(float r)59 void set_radius( float r ) { radius.set(r); } set_strength(float s)60 void set_strength( float s ) { strength.set(s); } set_luma_frac(float f)61 void set_luma_frac( float f ) { luma_frac.set(f); } set_chroma_frac(float f)62 void set_chroma_frac( float f ) { chroma_frac.set(f); } 63 64 get_P()65 int get_P() {return P; } get_K()66 int get_K() { return K; } get_sharpness()67 float get_sharpness() { return sharpness; } get_luma_frac()68 float get_luma_frac() { return luma_frac.get(); } get_chroma_frac()69 float get_chroma_frac() { return chroma_frac.get(); } 70 get_padding()71 int get_padding() { return P+K; } 72 73 VipsImage* build(std::vector<VipsImage*>& in, int first, 74 VipsImage* imap, VipsImage* omap, 75 unsigned int& level); 76 }; 77 78 79 80 class NonLocalMeans_DTAlgo_Par: public OpParBase 81 { 82 int P, K; 83 float sharpness, luma_frac, chroma_frac; 84 85 public: NonLocalMeans_DTAlgo_Par()86 NonLocalMeans_DTAlgo_Par(): OpParBase() {set_type("nlmeans_DT");} 87 has_intensity()88 bool has_intensity() { return false; } needs_caching()89 bool needs_caching() { return false; } 90 get_P()91 int get_P() {return P; } set_P(int val)92 void set_P(int val) { P = val; } get_K()93 int get_K() { return K; } set_K(int val)94 void set_K(int val) { K = val; } get_sharpness()95 float get_sharpness() { return sharpness; } set_sharpness(float val)96 void set_sharpness(float val) { sharpness = val; } get_luma_frac()97 float get_luma_frac() { return luma_frac; } set_luma_frac(float val)98 void set_luma_frac(float val) { luma_frac = val; } get_chroma_frac()99 float get_chroma_frac() { return chroma_frac; } set_chroma_frac(float val)100 void set_chroma_frac(float val) { chroma_frac = val; } 101 get_padding()102 int get_padding() { return P+K; } 103 104 /* Function to derive the output area from the input area 105 */ transform(const VipsRect * rin,VipsRect * rout,int)106 virtual void transform(const VipsRect* rin, VipsRect* rout, int /*id*/) 107 { 108 int pad = get_padding(); 109 rout->left = rin->left+pad; 110 rout->top = rin->top+pad; 111 rout->width = rin->width-pad*2; 112 rout->height = rin->height-pad*2; 113 } 114 115 /* Function to derive the area to be read from input images, 116 based on the requested output area 117 */ transform_inv(const VipsRect * rout,VipsRect * rin,int)118 virtual void transform_inv(const VipsRect* rout, VipsRect* rin, int /*id*/) 119 { 120 int pad = get_padding(); 121 rin->left = rout->left-pad; 122 rin->top = rout->top-pad; 123 rin->width = rout->width+pad*2; 124 rin->height = rout->height+pad*2; 125 } 126 }; 127 128 129 130 template < OP_TEMPLATE_DEF > 131 class NonLocalMeansProc 132 { 133 public: render(VipsRegion ** in,int n,int in_first,VipsRegion * imap,VipsRegion * omap,VipsRegion * out,OpParBase * par)134 void render(VipsRegion** in, int n, int in_first, 135 VipsRegion* imap, VipsRegion* omap, 136 VipsRegion* out, OpParBase* par) 137 { 138 std::cout<<"NonLocalMeansProc::render() called."<<std::endl; 139 } 140 }; 141 142 143 typedef union floatint_t 144 { 145 float f; 146 uint32_t i; 147 } floatint_t; 148 149 /* Non-local means filter, adapted from Darktable 150 */ 151 template < OP_TEMPLATE_DEF > 152 class NonLocalMeans_DTAlgo_Proc 153 { fast_mexp2f(const float x)154 float fast_mexp2f(const float x) 155 { 156 const float i1 = (float)0x3f800000u; // 2^0 157 const float i2 = (float)0x3f000000u; // 2^-1 158 const float k0 = i1 + x * (i2 - i1); 159 floatint_t k; 160 k.i = k0 >= (float)0x800000u ? k0 : 0; 161 return k.f; 162 } 163 gh(const float f,const float sharpness)164 float gh(const float f, const float sharpness) 165 { 166 const float f2 = f * sharpness; 167 return fast_mexp2f(f2); 168 // return 0.0001f + dt_fast_expf(-fabsf(f)*800.0f); 169 // return 1.0f/(1.0f + f*f); 170 // make spread bigger: less smoothing 171 // const float spread = 100.f; 172 // return 1.0f/(1.0f + fabsf(f)*spread); 173 } 174 175 176 public: render(VipsRegion ** ireg,int n,int in_first,VipsRegion * imap,VipsRegion * omap,VipsRegion * oreg,OpParBase * par)177 void render(VipsRegion** ireg, int n, int in_first, 178 VipsRegion* imap, VipsRegion* omap, 179 VipsRegion* oreg, OpParBase* par) 180 { 181 //std::cout<<"NonLocalMeansProc::render(RGB) called: n="<<n<<std::endl; 182 if( n < 1 ) return; 183 if( ireg[0] == NULL ) {std::cout<<"NonLocalMeansProc::render(RGB): ireg[0]==NULL"<<std::endl;return;} 184 185 //int iwidth = ireg[0]->valid.width; 186 //int iheight = ireg[0]->valid.height; 187 188 //The cleaning algorithm starts here 189 190 NonLocalMeans_DTAlgo_Par* opar = dynamic_cast<NonLocalMeans_DTAlgo_Par*>(par); 191 if( !opar ) { 192 std::cout<<"NonLocalMeansProc::render(RGB): opar==NULL"<<std::endl; 193 return; 194 } 195 196 VipsRect *ir = &ireg[0]->valid; 197 VipsRect roi = {ir->left+2, ir->top+2, ir->width-4, ir->height-4}; 198 VipsRect *r = &oreg->valid; 199 vips_rect_intersectrect( &roi, r, &roi ); 200 int nch = oreg->im->Bands; 201 int line_size = roi.width * oreg->im->Bands; 202 int width = ir->width; 203 int height = ir->height; 204 int dx = r->left - ir->left; 205 int dy = r->top - ir->top; 206 int padding = r->top - ir->top; 207 208 //std::cout<<"NonLocalMeansProc::render(RGB):"<<std::endl 209 // <<"ir: "<<ir->width<<","<<ir->height<<"+"<<ir->left<<"+"<<ir->top<<std::endl 210 // <<"r: "<<r->width<<","<<r->height<<"+"<<r->left<<"+"<<r->top<<std::endl; 211 212 //T* pin0; 213 ///T* pin1; 214 //T* pin2; 215 //T* pin20; 216 //T* pin21; 217 //T* pin22; 218 //T* pout; 219 //int x, y, oy, x1, y1, pos, ipos, pos1; 220 221 // adjust to zoom size: 222 const int P = opar->get_P(); //ceilf(d->radius * fmin(roi_in->scale, 2.0f) / fmax(piece->iscale, 1.0f)); // pixel filter size 223 const int K = opar->get_K(); //ceilf(7 * fmin(roi_in->scale, 2.0f) / fmax(piece->iscale, 1.0f)); // nbhood 224 const float sharpness = opar->get_sharpness(); //3000.0f / (1.0f + d->strength); 225 float luma = opar->get_luma_frac(); 226 float chroma = opar->get_chroma_frac(); 227 228 //int padding = opar->get_padding(); 229 230 // adjust to Lab, make L more important 231 // float max_L = 100.0f, max_C = 256.0f; 232 // float nL = 1.0f/(d->luma*max_L), nC = 1.0f/(d->chroma*max_C); 233 //float max_L = 120.0f, max_C = 512.0f; 234 float max_L = 1.2f, max_C = 2.0f; 235 float nL = 1.0f / max_L, nC = 1.0f / max_C; 236 const float norm2[4] = { nL * nL, nC * nC, nC * nC, 1.0f }; 237 //const float norm2[4] = { 1.0f, 1.0f, .0f, 1.0f }; 238 239 /* 240 { 241 for(int j = 0; j < 10; j++) { 242 T* pin = (T*)VIPS_REGION_ADDR( ireg[0], ir->left, ir->top+j ); 243 for( int i = 0; i < 10*nch; i+=nch ) printf("%f ", pin[i]*100); printf("\n"); 244 } 245 } 246 */ 247 248 float *Sa = (float*)malloc( sizeof(float) * width); 249 float *out_norm = (float*)malloc( sizeof(float) * width * height); 250 memset(out_norm, 0x0, sizeof(float) * width * height); 251 252 // we want to sum up weights in col[3], so need to init to 0: 253 for( int y = 0; y < r->height; y++ ) { 254 T* pout = (T*)VIPS_REGION_ADDR( oreg, r->left, r->top + y ); 255 memset( pout, 0x0, VIPS_REGION_SIZEOF_LINE(oreg) ); 256 } 257 //memset(ovoid, 0x0, (size_t)sizeof(float) * roi_out->width * roi_out->height * 4); 258 259 // for each shift vector 260 for(int kj = -K; kj <= K; kj++) { 261 for(int ki = -K; ki <= K; ki++) { 262 int inited_slide = 0; 263 // don't construct summed area tables but use sliding window! (applies to cpu version res < 1k only, or else 264 // we will add up errors) 265 // do this in parallel with a little threading overhead. could parallelize the outer loops with a bit more 266 // memory 267 //#ifdef _OPENMP 268 //#pragma omp parallel for schedule(static) default(none) firstprivate(inited_slide) shared(kj, ki, Sa) 269 //#endif 270 for(int j = 0; j < height; j++) { 271 if(j + kj < 0 || j + kj >= height) continue; 272 float *S = Sa; 273 //const float *ins = ((float *)ivoid) + 4 * ((size_t)roi_in->width * (j + kj) + ki); 274 const T *ins = (T*)VIPS_REGION_ADDR( ireg[0], ir->left + ki, ir->top + j + kj ); 275 //float *out = ((float *)ovoid) + 4 * (size_t)roi_out->width * j; 276 277 const int Pm = MIN(MIN(P, j + kj), j); 278 const int PM = MIN(MIN(P, height - 1 - j - kj), height - 1 - j); 279 // first line of every thread 280 // TODO: also every once in a while to assert numerical precision! 281 if(!inited_slide) { 282 // sum up a line 283 memset(Sa, 0x0, sizeof(float) * width); 284 for(int jj = -Pm; jj <= PM; jj++) { 285 int i = MAX(0, -ki); 286 float *s = S + i; 287 //const float *inp = ((float *)ivoid) + 4 * i + 4 * (size_t)roi_in->width * (j + jj); 288 const T* inp = (T*)VIPS_REGION_ADDR( ireg[0], ir->left + i, ir->top + j + jj ); 289 //printf("inp: left=%d top=%d\n", ir->left + i, ir->top + j + jj); 290 //const float *inps = ((float *)ivoid) + 4 * i + 4 * ((size_t)roi_in->width * (j + jj + kj) + ki); 291 const T* inps = (T*)VIPS_REGION_ADDR( ireg[0], ir->left + i + ki, ir->top + j + jj + kj); 292 //printf("inps: left=%d top=%d\n", ir->left + i + ki, ir->top + j + jj + kj); 293 const int last = width + MIN(0, -ki); 294 for(; i < last; i++, inp += nch, inps += nch, s++) 295 { 296 for(int k = 0; k < nch; k++) { 297 //float temp = (inp[k] - inps[k]) * (inp[k] - inps[k]) * norm2[k]; 298 s[0] += (inp[k] - inps[k]) * (inp[k] - inps[k]) * norm2[k]; 299 if( false && /*j < 10 && i < 10 &&*/ k==0) 300 printf(" kj=%d ki=%d j=%d Pm=%d PM=%d jj=%d i=%d inp[%d]=%f inps[%d]=%f s[0]+=%f\n", 301 kj, ki, j, Pm, PM, jj, i, k, (float)inp[k]*100, k, (float)inps[k]*100, 302 ((inp[k] - inps[k]) * (inp[k] - inps[k]) * norm2[k])); 303 } 304 } 305 if( false && /*j< 10*/ true ) 306 printf(" kj=%d ki=%d j=%d Pm=%d PM=%d jj=%d i=%d last=%d s[0]=%f\n", 307 kj, ki, j, Pm, PM, jj, i, last, s[0]); 308 } 309 // only reuse this if we had a full stripe 310 if(Pm == P && PM == P) inited_slide = 1; 311 } 312 //continue; 313 //std::cout<<"inited_slide="<<inited_slide<<std::endl; 314 315 // sliding window for this line: 316 T* out = (T*)VIPS_REGION_ADDR( oreg, ir->left, ir->top + j ); 317 float* on = out_norm + width*j; 318 float *s = S; 319 float slide = 0.0f; 320 // sum up the first -P..P 321 for(int i = 0; i < 2 * P + 1; i++) slide += s[i]; 322 for(int i = 0; i < width; i++, s++, ins += nch, out += nch, on += 1) { 323 if(i - P > 0 && i + P < width) 324 slide += s[P] - s[-P - 1]; 325 if(i >= padding && i < width - padding && i + ki >= 0 && i + ki < width && j >= padding && j < height - padding) { 326 const float iv[4] = { static_cast<float>(ins[0]), static_cast<float>(ins[1]), static_cast<float>(ins[2]), 1.0f }; 327 //#if defined(_OPENMP) && defined(OPENMP_SIMD_) 328 //#pragma omp SIMD() 329 //#endif 330 float gh_val = gh(slide, sharpness); 331 for(int c = 0; c < nch; c++) 332 { 333 //printf(" i=%d out[0] = %f + %f * gh(%f, %f) = %f + %f * %f = %f\n", 334 // i, out[c], iv[c], slide, sharpness, 335 // out[c], iv[c], gh_val, 336 // out[c] + iv[c] * gh_val); 337 out[c] += iv[c] * gh_val; 338 } 339 //printf(" i=%d on[0] = %f + %f = %f\n", i, on[0], gh_val, on[0] + gh_val); 340 on[0] += gh_val; 341 } 342 } 343 //continue; 344 if(inited_slide && j + P + 1 + MAX(0, kj) < height) 345 { 346 // sliding window in j direction: 347 int i = MAX(0, -ki); 348 float *s = S + i; 349 //const float *inp = ((float *)ivoid) + 4 * i + 4 * (size_t)roi_in->width * (j + P + 1); 350 const T* inp = (T*)VIPS_REGION_ADDR( ireg[0], ir->left + i, ir->top + j + P + 1 ); 351 //const float *inps = ((float *)ivoid) + 4 * i + 4 * ((size_t)roi_in->width * (j + P + 1 + kj) + ki); 352 const T* inps = (T*)VIPS_REGION_ADDR( ireg[0], ir->left + i + ki, ir->top + j + P + 1 + kj ); 353 //const float *inm = ((float *)ivoid) + 4 * i + 4 * (size_t)roi_in->width * (j - P); 354 const T* inm = (T*)VIPS_REGION_ADDR( ireg[0], ir->left + i, ir->top + j - P ); 355 //const float *inms = ((float *)ivoid) + 4 * i + 4 * ((size_t)roi_in->width * (j - P + kj) + ki); 356 const T* inms = (T*)VIPS_REGION_ADDR( ireg[0], ir->left + i + ki, ir->top + j - P + kj ); 357 const int last = width + MIN(0, -ki); 358 for(; i < last; i++, inp += nch, inps += nch, inm += nch, inms += nch, s++) 359 { 360 float stmp = s[0]; 361 for(int k = 0; k < nch; k++) 362 stmp += ((inp[k] - inps[k]) * (inp[k] - inps[k]) - (inm[k] - inms[k]) * (inm[k] - inms[k])) * norm2[k]; 363 s[0] = stmp; 364 } 365 } 366 else 367 inited_slide = 0; 368 } 369 } 370 } 371 372 // normalize and apply chroma/luma blending 373 const float weight[4] = { luma, chroma, chroma, 1.0f }; 374 const float invert[4] = { 1.0f - luma, 1.0f - chroma, 1.0f - chroma, 0.0f }; 375 376 for(int j = 0; j < r->height; j++) { 377 T* in = ( (T*)VIPS_REGION_ADDR(ireg[0], r->left, r->top + j) ); 378 T* out = ( (T*)VIPS_REGION_ADDR(oreg, r->left, r->top + j) ); 379 float* on = out_norm + (j+padding) * width + padding; 380 for(int i = 0; i < r->width; i++, in+=nch, out+=nch, on+=1) { 381 for(int c = 0; c < nch; c++) { 382 //printf("j=%d i=%d out[c]=%f * %f / %f = %f\n", j, i, out[c], weight[c], on[0], out[c] * weight[c] / on[0]); 383 out[c] = in[c] * invert[c] + out[c] * weight[c] / on[0]; 384 } 385 //out[1] = out[2] = 0.5; 386 } 387 } 388 389 //#ifdef _OPENMP 390 //#pragma omp parallel for SIMD() default(none) schedule(static) collapse(2) 391 //#endif 392 // for(size_t k = 0; k < (size_t)ch * roi_out->width * roi_out->height; k += ch) 393 // { 394 // for(size_t c = 0; c < 4; c++) 395 // { 396 // out[k + c] = (in[k + c] * invert[c]) + (out[k + c] * (weight[c] / out[k + 3])); 397 // } 398 // } 399 400 // free shared tmp memory: 401 free(Sa); 402 free(out_norm); 403 } 404 }; 405 406 407 ProcessorBase* new_nlmeans_algo(); 408 409 ProcessorBase* new_nlmeans(); 410 411 } 412 413 #endif 414 415 416