1 // This software is in the public domain. Where that dedication is not
2 // recognized, you are granted a perpetual, irrevocable license to copy
3 // and modify this file as you see fit.
4
5 #include "ebsynth.h"
6 #include "jzq.h"
7
8 #include <cmath>
9 #include <cfloat>
10 #include <cstring>
11
12 #ifdef __APPLE__
13 #include <dispatch/dispatch.h>
14 #else
15 #include <omp.h>
16 #endif
17
18 #define FOR(A,X,Y) for(int Y=0;Y<A.height();Y++) for(int X=0;X<A.width();X++)
19
nnfInit(const V2i & sizeA,const V2i & sizeB,const int patchWidth)20 A2V2i nnfInit(const V2i& sizeA,
21 const V2i& sizeB,
22 const int patchWidth)
23 {
24 A2V2i NNF(sizeA);
25
26 for(int xy=0;xy<NNF.numel();xy++)
27 {
28 NNF[xy] = V2i(patchWidth+rand()%(sizeB(0)-2*patchWidth),
29 patchWidth+rand()%(sizeB(1)-2*patchWidth));
30 }
31
32 return NNF;
33 }
34
35 template<typename FUNC>
nnfError(const A2V2i & NNF,const int patchWidth,FUNC patchError)36 A2f nnfError(const A2V2i& NNF,
37 const int patchWidth,
38 FUNC patchError)
39 {
40 A2f E(size(NNF));
41
42 #pragma omp parallel for schedule(static)
43 for(int y=0;y<NNF.height();y++)
44 for(int x=0;x<NNF.width();x++)
45 {
46 E(x,y) = patchError(patchWidth,V2i(x,y),NNF(x,y),FLT_MAX);
47 }
48
49 return E;
50 }
51
nnfInitRandom(const V2i & targetSize,const V2i & sourceSize,const int patchSize)52 static A2V2i nnfInitRandom(const V2i& targetSize,
53 const V2i& sourceSize,
54 const int patchSize)
55 {
56 A2V2i NNF(targetSize);
57 const int r = patchSize/2;
58
59 for (int i = 0; i < NNF.numel(); i++)
60 {
61 NNF[i] = V2i
62 (
63 r+(rand()%(sourceSize[0]-2*r)),
64 r+(rand()%(sourceSize[1]-2*r))
65 );
66 }
67
68 return NNF;
69 }
70
nnfUpscale(const A2V2i & NNF,const int patchSize,const V2i & targetSize,const V2i & sourceSize)71 static A2V2i nnfUpscale(const A2V2i& NNF,
72 const int patchSize,
73 const V2i& targetSize,
74 const V2i& sourceSize)
75 {
76 A2V2i NNF2x(targetSize);
77
78 FOR(NNF2x,x,y)
79 {
80 NNF2x(x,y) = NNF(clamp(x/2,0,NNF.width()-1),
81 clamp(y/2,0,NNF.height()-1))*2+V2i(x%2,y%2);
82 }
83
84 FOR(NNF2x,x,y)
85 {
86 const V2i nn = NNF2x(x,y);
87
88 NNF2x(x,y) = V2i(clamp(nn(0),patchSize,sourceSize(0)-patchSize-1),
89 clamp(nn(1),patchSize,sourceSize(1)-patchSize-1));
90 }
91
92 return NNF2x;
93 }
94
95 template<int N,typename T>
krnlVotePlain(Array2<Vec<N,T>> & target,const Array2<Vec<N,T>> & source,const Array2<Vec<2,int>> & NNF,const int patchSize)96 void krnlVotePlain( Array2<Vec<N,T>>& target,
97 const Array2<Vec<N,T>>& source,
98 const Array2<Vec<2,int>>& NNF,
99 const int patchSize)
100 {
101 for(int y=0;y<target.height();y++)
102 for(int x=0;x<target.width();x++)
103 {
104 const int r = patchSize / 2;
105
106 Vec<N,float> sumColor = zero<Vec<N,float>>::value();
107 float sumWeight = 0;
108
109 for (int py = -r; py <= +r; py++)
110 for (int px = -r; px <= +r; px++)
111 {
112 if
113 (
114 x+px >= 0 && x+px < NNF.width () &&
115 y+py >= 0 && y+py < NNF.height()
116 )
117 {
118 const V2i n = NNF(x+px,y+py)-V2i(px,py);
119
120 if
121 (
122 n[0] >= 0 && n[0] < source.width () &&
123 n[1] >= 0 && n[1] < source.height()
124 )
125 {
126 const float weight = 1.0f;
127 sumColor += weight*Vec<N,float>(source(n(0),n(1)));
128 sumWeight += weight;
129 }
130 }
131 }
132
133 const Vec<N,T> v = Vec<N,T>(sumColor/sumWeight);
134 target(x,y) = v;
135 }
136 }
137
138 #if 0
139 template<int N, typename T, int M>
140 __global__ void krnlVoteWeighted( TexArray2<N,T,M> target,
141 const TexArray2<N,T,M> source,
142 const TexArray2<2,int> NNF,
143 const TexArray2<1,float> E,
144 const int patchSize)
145 {
146 const int x = blockDim.x*blockIdx.x + threadIdx.x;
147 const int y = blockDim.y*blockIdx.y + threadIdx.y;
148
149 if (x<target.width && y<target.height)
150 {
151 const int r = patchSize / 2;
152
153 Vec<N,float> sumColor = zero<Vec<N,float>>::value();
154 float sumWeight = 0;
155
156 for (int py = -r; py <= +r; py++)
157 for (int px = -r; px <= +r; px++)
158 {
159 /*
160 if
161 (
162 x+px >= 0 && x+px < NNF.width () &&
163 y+py >= 0 && y+py < NNF.height()
164 )
165 */
166 {
167 const V2i n = NNF(x+px,y+py)-V2i(px,py);
168
169 /*if
170 (
171 n[0] >= 0 && n[0] < S.width () &&
172 n[1] >= 0 && n[1] < S.height()
173 )*/
174 {
175 const float error = E(x+px,y+py)(0)/(patchSize*patchSize*N);
176 const float weight = 1.0f/(1.0f+error);
177 sumColor += weight*Vec<N,float>(source(n(0),n(1)));
178 sumWeight += weight;
179 }
180 }
181 }
182
183 const Vec<N,T> v = Vec<N,T>(sumColor/sumWeight);
184 target.write(x,y,v);
185 }
186 }
187 #endif
188
189 template<int N,typename T>
sampleBilinear(const Array2<Vec<N,T>> & I,float x,float y)190 Vec<N,T> sampleBilinear(const Array2<Vec<N,T>>& I,float x,float y)
191 {
192 const int ix = x;
193 const int iy = y;
194
195 const float s = x-ix;
196 const float t = y-iy;
197
198 return Vec<N,T>((1.0f-s)*(1.0f-t)*Vec<N,float>(I(clamp(ix ,0,I.width()-1),clamp(iy ,0,I.height()-1)))+
199 ( s)*(1.0f-t)*Vec<N,float>(I(clamp(ix+1,0,I.width()-1),clamp(iy ,0,I.height()-1)))+
200 (1.0f-s)*( t)*Vec<N,float>(I(clamp(ix ,0,I.width()-1),clamp(iy+1,0,I.height()-1)))+
201 ( s)*( t)*Vec<N,float>(I(clamp(ix+1,0,I.width()-1),clamp(iy+1,0,I.height()-1))));
202 };
203
204 /*
205 template<int N, typename T, int M>
206 __global__ void krnlEvalMask( TexArray2<1,unsigned char> mask,
207 const TexArray2<N,T,M> style,
208 const TexArray2<N,T,M> style2,
209 const int stopThreshold)
210 {
211 const int x = blockDim.x*blockIdx.x + threadIdx.x;
212 const int y = blockDim.y*blockIdx.y + threadIdx.y;
213
214 if (x<mask.width && y<mask.height)
215 {
216 const Vec<N,T> s = style(x,y);
217 const Vec<N,T> s2 = style2(x,y);
218
219 int maxDiff = 0;
220 for(int c=0;c<N;c++)
221 {
222 const int diff = std::abs(int(s[c])-int(s2[c]));
223 maxDiff = diff>maxDiff ? diff:maxDiff;
224 }
225
226 const Vec<1,unsigned char> msk = maxDiff < stopThreshold ? Vec<1,unsigned char>(0) : Vec<1,unsigned char>(255);
227
228 mask.write(x,y,msk);
229 }
230 }
231
232 __global__ void krnlDilateMask(TexArray2<1,unsigned char> mask2,
233 const TexArray2<1,unsigned char> mask,
234 const int patchSize)
235 {
236 const int x = blockDim.x*blockIdx.x + threadIdx.x;
237 const int y = blockDim.y*blockIdx.y + threadIdx.y;
238
239 if (x<mask.width && y<mask.height)
240 {
241 const int r = patchSize / 2;
242
243 Vec<1,unsigned char> msk = Vec<1,unsigned char>(0);
244
245 for (int py = -r; py <= +r; py++)
246 for (int px = -r; px <= +r; px++)
247 {
248 if (mask(x+px,y+py)[0]==255) { msk = Vec<1,unsigned char>(255); }
249 }
250
251 mask2.write(x,y,msk);
252 }
253 }
254 */
255
256 template<int N,typename T>
resampleCPU(Array2<Vec<N,T>> & O,const Array2<Vec<N,T>> & I)257 void resampleCPU( Array2<Vec<N,T>>& O,
258 const Array2<Vec<N,T>>& I)
259 {
260 const float s = float(I.width())/float(O.width());
261
262 for(int y=0;y<O.height();y++)
263 for(int x=0;x<O.width();x++)
264 {
265 O(x,y) = sampleBilinear(I,s*float(x),s*float(y));
266 }
267 }
268
269 template<int NS,int NG,typename T>
270 struct PatchSSD_Split
271 {
272 const Array2<Vec<NS,T>>& targetStyle;
273 const Array2<Vec<NS,T>>& sourceStyle;
274
275 const Array2<Vec<NG,T>>& targetGuide;
276 const Array2<Vec<NG,T>>& sourceGuide;
277
278 const Vec<NS,float>& styleWeights;
279 const Vec<NG,float>& guideWeights;
280
PatchSSD_SplitPatchSSD_Split281 PatchSSD_Split(const Array2<Vec<NS,T>>& targetStyle,
282 const Array2<Vec<NS,T>>& sourceStyle,
283
284 const Array2<Vec<NG,T>>& targetGuide,
285 const Array2<Vec<NG,T>>& sourceGuide,
286
287 const Vec<NS,float>& styleWeights,
288 const Vec<NG,float>& guideWeights)
289
290 : targetStyle(targetStyle),sourceStyle(sourceStyle),
291 targetGuide(targetGuide),sourceGuide(sourceGuide),
292 styleWeights(styleWeights),guideWeights(guideWeights) {}
293
operator ()PatchSSD_Split294 float operator()(const int patchSize,
295 const V2i txy,
296 const V2i sxy,
297 const float ebest)
298 {
299 const int tx = txy(0);
300 const int ty = txy(1);
301 const int sx = sxy(0);
302 const int sy = sxy(1);
303
304 const int r = patchSize/2;
305 float error = 0;
306
307 if(tx-r>=0 && tx+r<targetStyle.width() &&
308 ty-r>=0 && ty+r<targetStyle.height())
309 {
310 const T* ptrTs = (T*)&targetStyle(tx-r,ty-r);
311 const T* ptrSs = (T*)&sourceStyle(sx-r,sy-r);
312 const T* ptrTg = (T*)&targetGuide(tx-r,ty-r);
313 const T* ptrSg = (T*)&sourceGuide(sx-r,sy-r);
314 const int ofsTs = (targetStyle.width()-patchSize)*NS;
315 const int ofsSs = (sourceStyle.width()-patchSize)*NS;
316 const int ofsTg = (targetGuide.width()-patchSize)*NG;
317 const int ofsSg = (sourceGuide.width()-patchSize)*NG;
318 for(int j=0;j<patchSize;j++)
319 {
320 for(int i=0;i<patchSize;i++)
321 {
322 for(int k=0;k<NS;k++)
323 {
324 const float diff = *ptrTs - *ptrSs;
325 error += styleWeights[k]*diff*diff;
326 ptrTs++;
327 ptrSs++;
328 }
329 for(int k=0;k<NG;k++)
330 {
331 const float diff = *ptrTg - *ptrSg;
332 error += guideWeights[k]*diff*diff;
333 ptrTg++;
334 ptrSg++;
335 }
336 }
337 ptrTs += ofsTs;
338 ptrSs += ofsSs;
339 ptrTg += ofsTg;
340 ptrSg += ofsSg;
341 if(error>ebest) { break; }
342 }
343 }
344 else
345 {
346 for(int py=-r;py<=+r;py++)
347 for(int px=-r;px<=+r;px++)
348 {
349 {
350 const Vec<NS,T> pixTs = targetStyle(clamp(tx + px,0,targetStyle.width()-1),clamp(ty + py,0,targetStyle.height()-1));
351 const Vec<NS,T> pixSs = sourceStyle(clamp(sx + px,0,sourceStyle.width()-1),clamp(sy + py,0,sourceStyle.height()-1));
352 for(int i=0;i<NS;i++)
353 {
354 const float diff = float(pixTs[i]) - float(pixSs[i]);
355 error += styleWeights[i]*diff*diff;
356 }
357 }
358
359 {
360 const Vec<NG,T> pixTg = targetGuide(clamp(tx + px,0,targetGuide.width()-1),clamp(ty + py,0,targetGuide.height()-1));
361 const Vec<NG,T> pixSg = sourceGuide(clamp(sx + px,0,sourceGuide.width()-1),clamp(sy + py,0,sourceGuide.height()-1));
362 for(int i=0;i<NG;i++)
363 {
364 const float diff = float(pixTg[i]) - float(pixSg[i]);
365 error += guideWeights[i]*diff*diff;
366 }
367 }
368 }
369 }
370
371 return error;
372 }
373 };
374
375 /*
376 template<int NS,int NG,typename T>
377 struct PatchSSD_Split_Modulation
378 {
379 const TexArray2<NS,T> targetStyle;
380 const TexArray2<NS,T> sourceStyle;
381
382 const TexArray2<NG,T> targetGuide;
383 const TexArray2<NG,T> sourceGuide;
384
385 const TexArray2<NG,T> targetModulation;
386
387 const Vec<NS,float> styleWeights;
388 const Vec<NG,float> guideWeights;
389
390 PatchSSD_Split_Modulation(const TexArray2<NS,T>& targetStyle,
391 const TexArray2<NS,T>& sourceStyle,
392
393 const TexArray2<NG,T>& targetGuide,
394 const TexArray2<NG,T>& sourceGuide,
395
396 const TexArray2<NG,T>& targetModulation,
397
398 const Vec<NS,float>& styleWeights,
399 const Vec<NG,float>& guideWeights)
400
401 : targetStyle(targetStyle),sourceStyle(sourceStyle),
402 targetGuide(targetGuide),sourceGuide(sourceGuide),
403 targetModulation(targetModulation),
404 styleWeights(styleWeights),guideWeights(guideWeights) {}
405
406 __device__ float operator()(const int patchSize,
407 const int tx,
408 const int ty,
409 const int sx,
410 const int sy,
411 const float ebest)
412 {
413 const int r = patchSize/2;
414 float error = 0;
415
416 for(int py=-r;py<=+r;py++)
417 {
418 for(int px=-r;px<=+r;px++)
419 {
420 {
421 const Vec<NS,T> pixTs = targetStyle(tx + px,ty + py);
422 const Vec<NS,T> pixSs = sourceStyle(sx + px,sy + py);
423 for(int i=0;i<NS;i++)
424 {
425 const float diff = float(pixTs[i]) - float(pixSs[i]);
426 error += styleWeights[i]*diff*diff;
427 }
428 }
429
430 {
431 const Vec<NG,T> pixTg = targetGuide(tx + px,ty + py);
432 const Vec<NG,T> pixSg = sourceGuide(sx + px,sy + py);
433 const Vec<NG,float> mult = Vec<NG,float>(targetModulation(tx,ty))/255.0f;
434
435 for(int i=0;i<NG;i++)
436 {
437 const float diff = float(pixTg[i]) - float(pixSg[i]);
438 error += guideWeights[i]*mult[i]*diff*diff;
439 }
440 }
441 }
442
443 if (error>ebest) { return error; }
444 }
445
446 return error;
447 }
448 };
449 */
450
pyramidLevelSize(const V2i & sizeBase,const int numLevels,const int level)451 static V2i pyramidLevelSize(const V2i& sizeBase,const int numLevels,const int level)
452 {
453 return V2i(V2f(sizeBase)*std::pow(2.0f,-float(numLevels-1-level)));
454 }
455
456 template<typename T>
copy(Array2<T> * out_dst,void * src)457 void copy(Array2<T>* out_dst,void* src)
458 {
459 Array2<T>& dst = *out_dst;
460 memcpy(dst.data(),src,numel(dst)*sizeof(T));
461 }
462
463 template<typename T>
copy(void ** out_dst,const Array2<T> & src)464 void copy(void** out_dst,const Array2<T>& src)
465 {
466 void*& dst = *out_dst;
467 memcpy(dst,src.data(),numel(src)*sizeof(T));
468 }
469
updateOmega(A2i & Omega,const V2i & sizeA,const int patchWidth,const V2i & axy,const V2i & bxy,const int incdec)470 void updateOmega(A2i& Omega,const V2i& sizeA,const int patchWidth,const V2i& axy,const V2i& bxy,const int incdec)
471 {
472 const int r = patchWidth/2;
473
474 int* ptr = (int*)&Omega(bxy(0)-r,bxy(1)-r);
475 const int ofs = (Omega.width()-patchWidth);
476
477 for(int j=0;j<patchWidth;j++)
478 {
479 for(int i=0;i<patchWidth;i++)
480 {
481 *ptr += incdec;
482 ptr++;
483 }
484 ptr += ofs;
485 }
486 }
487
patchOmega(const int patchWidth,const V2i & bxy,const A2i & Omega)488 static int patchOmega(const int patchWidth,const V2i& bxy,const A2i& Omega)
489 {
490 const int r = patchWidth/2;
491
492 int sum = 0;
493
494 const int* ptr = (int*)&Omega(bxy(0)-r,bxy(1)-r);
495 const int ofs = (Omega.width()-patchWidth);
496
497 for(int j=0;j<patchWidth;j++)
498 {
499 for(int i=0;i<patchWidth;i++)
500 {
501 sum += (*ptr);
502 ptr++;
503 }
504 ptr += ofs;
505 }
506
507 return sum;
508 }
509
510 template<typename FUNC>
tryPatch(FUNC patchError,const V2i & sizeA,int patchWidth,const V2i & axy,const V2i & bxy,A2V2i & N,A2f & E,A2i & Omega,float omegaBest,float lambda)511 bool tryPatch(FUNC patchError,const V2i& sizeA,int patchWidth,const V2i& axy,const V2i& bxy,A2V2i& N,A2f& E,A2i& Omega,float omegaBest,float lambda)
512 {
513 const float curOcc = (float(patchOmega(patchWidth,N(axy),Omega))/float(patchWidth*patchWidth))/omegaBest;
514 const float newOcc = (float(patchOmega(patchWidth, bxy,Omega))/float(patchWidth*patchWidth))/omegaBest;
515
516 const float curErr = E(axy);
517 const float newErr = patchError(patchWidth,axy,bxy,curErr+lambda*curOcc);
518
519 if ((newErr+lambda*newOcc) < (curErr+lambda*curOcc))
520 {
521 updateOmega(Omega,sizeA,patchWidth,axy,bxy ,+1);
522 updateOmega(Omega,sizeA,patchWidth,axy,N(axy),-1);
523 N(axy) = bxy;
524 E(axy) = newErr;
525 }
526
527 return true;
528 }
529
530 template<typename FUNC>
patchmatch(const V2i & sizeA,const V2i & sizeB,const int patchWidth,FUNC patchError,const float lambda,const int numIters,const int numThreads,A2V2i & N,A2f & E,A2i & Omega)531 void patchmatch(const V2i& sizeA,
532 const V2i& sizeB,
533 const int patchWidth,
534 FUNC patchError,
535 const float lambda,
536 const int numIters,
537 const int numThreads,
538 A2V2i& N,
539 A2f& E,
540 A2i& Omega)
541 {
542 const int w = patchWidth;
543
544 E = nnfError(N,patchWidth,patchError);
545
546 const float sra = 0.5f;
547
548 std::vector<int> irad;
549
550 irad.push_back((sizeB(0) > sizeB(1) ? sizeB(0) : sizeB(1)));
551
552 while (irad.back() != 1) irad.push_back(int(std::pow(sra, int(irad.size())) * irad[0]));
553
554 const int nir = int(irad.size());
555
556 #ifdef __APPLE__
557 dispatch_queue_t gcdq = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH,0);
558 const int numThreads_ = 8;
559 #else
560 const int numThreads_ = numThreads<1 ? omp_get_max_threads() : numThreads;
561 #endif
562
563 const int minTileHeight = 8;
564 const int numTiles = int(ceil(float(sizeA(1))/float(numThreads_))) > minTileHeight ? numThreads_ : std::max(int(ceil(float(sizeA(1))/float(minTileHeight))),1);
565 const int tileHeight = sizeA(1)/numTiles;
566
567 const float omegaBest = (float(sizeA(0)*sizeA(1)) /
568 float(sizeB(0)*sizeB(1))) * float(patchWidth*patchWidth);
569
570 fill(&Omega,(int)0);
571 for(int y=0;y<sizeA(1);y++)
572 for(int x=0;x<sizeA(0);x++)
573 {
574 updateOmega(Omega,sizeA,w,V2i(x,y),N(x,y),+1);
575 }
576
577 for (int iter = 0; iter < numIters; iter++)
578 {
579 const int iter_seed = rand();
580
581 #ifdef __APPLE__
582 dispatch_apply(numTiles,gcdq,^(size_t blockIdx)
583 #else
584 #pragma omp parallel num_threads(numTiles)
585 #endif
586 {
587 const bool odd = (iter%2 == 0);
588
589 #ifdef __APPLE__
590 const int threadId = blockIdx;
591 #else
592 const int threadId = omp_get_thread_num();
593 #endif
594
595 const int _y0 = threadId*tileHeight;
596 const int _y1 = threadId==numTiles-1 ? sizeA(1) : std::min(_y0+tileHeight,sizeA(1));
597
598 const int q = odd ? 1 : -1;
599 const int x0 = odd ? 0 : sizeA(0)-1;
600 const int y0 = odd ? _y0 : _y1-1;
601 const int x1 = odd ? sizeA(0) : -1;
602 const int y1 = odd ? _y1 : _y0-1;
603
604 for (int y = y0; y != y1; y += q)
605 for (int x = x0; x != x1; x += q)
606 {
607 if (odd ? (x > 0) : (x < sizeA(0)-1))
608 {
609 V2i n = N(x-q,y); n[0] += q;
610
611 if (odd ? (n[0] < sizeB(0)-w/2) : (n[0] >= w/2))
612 {
613 tryPatch(patchError,sizeA,w,V2i(x,y),n,N,E,Omega,omegaBest,lambda);
614 }
615 }
616
617 if (odd ? (y > 0) : (y <sizeA(1)-1))
618 {
619 V2i n = N(x,y-q); n[1] += q;
620
621 if (odd ? (n[1] < sizeB(1)-w/2) : (n[1] >= w/2))
622 {
623 tryPatch(patchError,sizeA,w,V2i(x,y),n,N,E,Omega,omegaBest,lambda);
624 }
625 }
626
627 #define RANDI(u) (18000 * ((u) & 65535) + ((u) >> 16))
628
629 unsigned int seed = (x | (y<<11)) ^ iter_seed;
630 seed = RANDI(seed);
631
632 const V2i pix0 = N(x,y);
633 //for (int i = 0; i < nir; i++)
634 for (int i = nir-1; i >=0; i--)
635 {
636 V2i tl = pix0 - V2i(irad[i], irad[i]);
637 V2i br = pix0 + V2i(irad[i], irad[i]);
638
639 tl = std::max(tl,V2i(w/2,w/2));
640 br = std::min(br,sizeB-V2i(w/2,w/2));
641
642 const int _rndX = RANDI(seed);
643 const int _rndY = RANDI(_rndX);
644 seed=_rndY;
645
646 const V2i n = V2i
647 (
648 tl[0] + (_rndX % (br[0]-tl[0])),
649 tl[1] + (_rndY % (br[1]-tl[1]))
650 );
651
652 tryPatch(patchError,sizeA,w,V2i(x,y),n,N,E,Omega,omegaBest,lambda);
653 }
654
655 #undef RANDI
656 }
657 }
658 #ifdef __APPLE__
659 );
660 #endif
661 }
662 }
663
664 template<int NS,int NG>
ebsynthCpu(int numStyleChannels,int numGuideChannels,int sourceWidth,int sourceHeight,void * sourceStyleData,void * sourceGuideData,int targetWidth,int targetHeight,void * targetGuideData,void * targetModulationData,float * styleWeights,float * guideWeights,float uniformityWeight,int patchSize,int voteMode,int numPyramidLevels,int * numSearchVoteItersPerLevel,int * numPatchMatchItersPerLevel,int * stopThresholdPerLevel,int extraPass3x3,void * outputNnfData,void * outputImageData)665 void ebsynthCpu(int numStyleChannels,
666 int numGuideChannels,
667 int sourceWidth,
668 int sourceHeight,
669 void* sourceStyleData,
670 void* sourceGuideData,
671 int targetWidth,
672 int targetHeight,
673 void* targetGuideData,
674 void* targetModulationData,
675 float* styleWeights,
676 float* guideWeights,
677 float uniformityWeight,
678 int patchSize,
679 int voteMode,
680 int numPyramidLevels,
681 int* numSearchVoteItersPerLevel,
682 int* numPatchMatchItersPerLevel,
683 int* stopThresholdPerLevel,
684 int extraPass3x3,
685 void* outputNnfData,
686 void* outputImageData)
687 {
688 const int levelCount = numPyramidLevels;
689
690 struct PyramidLevel
691 {
692 PyramidLevel() { }
693
694 int sourceWidth;
695 int sourceHeight;
696 int targetWidth;
697 int targetHeight;
698
699 Array2<Vec<NS,unsigned char>> sourceStyle;
700 Array2<Vec<NG,unsigned char>> sourceGuide;
701 Array2<Vec<NS,unsigned char>> targetStyle;
702 Array2<Vec<NS,unsigned char>> targetStyle2;
703 //Array2<unsigned char> mask;
704 //Array2<unsigned char> mask2;
705 Array2<Vec<NG,unsigned char>> targetGuide;
706 Array2<Vec<NG,unsigned char>> targetModulation;
707 Array2<Vec<2,int>> NNF;
708 //Array2<Vec<2,int>> NNF2;
709 Array2<float> E;
710 Array2<int> Omega;
711 };
712
713 std::vector<PyramidLevel> pyramid(levelCount);
714 for(int level=0;level<levelCount;level++)
715 {
716 const V2i levelSourceSize = pyramidLevelSize(V2i(sourceWidth,sourceHeight),levelCount,level);
717 const V2i levelTargetSize = pyramidLevelSize(V2i(targetWidth,targetHeight),levelCount,level);
718
719 pyramid[level].sourceWidth = levelSourceSize(0);
720 pyramid[level].sourceHeight = levelSourceSize(1);
721 pyramid[level].targetWidth = levelTargetSize(0);
722 pyramid[level].targetHeight = levelTargetSize(1);
723 }
724
725 pyramid[levelCount-1].sourceStyle = Array2<Vec<NS,unsigned char>>(V2i(pyramid[levelCount-1].sourceWidth,pyramid[levelCount-1].sourceHeight));
726 pyramid[levelCount-1].sourceGuide = Array2<Vec<NG,unsigned char>>(V2i(pyramid[levelCount-1].sourceWidth,pyramid[levelCount-1].sourceHeight));
727 pyramid[levelCount-1].targetGuide = Array2<Vec<NG,unsigned char>>(V2i(pyramid[levelCount-1].targetWidth,pyramid[levelCount-1].targetHeight));
728
729 copy(&pyramid[levelCount-1].sourceStyle,sourceStyleData);
730 copy(&pyramid[levelCount-1].sourceGuide,sourceGuideData);
731 copy(&pyramid[levelCount-1].targetGuide,targetGuideData);
732
733 if (targetModulationData)
734 {
735 pyramid[levelCount-1].targetModulation = Array2<Vec<NG,unsigned char>>(V2i(pyramid[levelCount-1].targetWidth,pyramid[levelCount-1].targetHeight));
736 copy(&pyramid[levelCount-1].targetModulation,targetModulationData);
737 }
738
739 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
740
741 bool inExtraPass = false;
742
743 for (int level=0;level<pyramid.size();level++)
744 {
745 if (!inExtraPass)
746 {
747 const V2i levelSourceSize = V2i(pyramid[level].sourceWidth,pyramid[level].sourceHeight);
748 const V2i levelTargetSize = V2i(pyramid[level].targetWidth,pyramid[level].targetHeight);
749
750 pyramid[level].targetStyle = Array2<Vec<NS,unsigned char>>(levelTargetSize);
751 pyramid[level].targetStyle2 = Array2<Vec<NS,unsigned char>>(levelTargetSize);
752 //pyramid[level].mask = Array2<unsigned char>(levelTargetSize);
753 //pyramid[level].mask2 = Array2<unsigned char>(levelTargetSize);
754 pyramid[level].NNF = Array2<Vec<2,int>>(levelTargetSize);
755 //pyramid[level].NNF2 = Array2<Vec<2,int>>(levelTargetSize);
756 pyramid[level].Omega = Array2<int>(levelSourceSize);
757 pyramid[level].E = Array2<float>(levelTargetSize);
758
759 if (level<levelCount-1)
760 {
761 pyramid[level].sourceStyle = Array2<Vec<NS,unsigned char>>(levelSourceSize);
762 pyramid[level].sourceGuide = Array2<Vec<NG,unsigned char>>(levelSourceSize);
763 pyramid[level].targetGuide = Array2<Vec<NG,unsigned char>>(levelTargetSize);
764
765 resampleCPU(pyramid[level].sourceStyle,pyramid[levelCount-1].sourceStyle);
766 resampleCPU(pyramid[level].sourceGuide,pyramid[levelCount-1].sourceGuide);
767 resampleCPU(pyramid[level].targetGuide,pyramid[levelCount-1].targetGuide);
768
769 if (targetModulationData)
770 {
771 resampleCPU(pyramid[level].targetModulation,pyramid[levelCount-1].targetModulation);
772 pyramid[level].targetModulation = Array2<Vec<NG,unsigned char>>(levelTargetSize);
773 }
774 }
775
776 A2V2i cpu_NNF;
777 if (level>0)
778 {
779 pyramid[level].NNF = nnfUpscale(pyramid[level-1].NNF,
780 patchSize,
781 V2i(pyramid[level].targetWidth,pyramid[level].targetHeight),
782 V2i(pyramid[level].sourceWidth,pyramid[level].sourceHeight));
783
784 pyramid[level-1].NNF = A2V2i();
785 }
786 else
787 {
788 pyramid[level].NNF = nnfInitRandom(V2i(pyramid[level].targetWidth,pyramid[level].targetHeight),
789 V2i(pyramid[level].sourceWidth,pyramid[level].sourceHeight),
790 patchSize);
791 }
792
793 /////////////////////////////////////////////////////////////////////////
794 /*
795 Array2<int> cpu_Omega(pyramid[level].sourceWidth,pyramid[level].sourceHeight);
796
797 fill(&cpu_Omega,(int)0);
798 for(int ay=0;ay<cpu_NNF.height();ay++)
799 for(int ax=0;ax<cpu_NNF.width();ax++)
800 {
801 const V2i& n = cpu_NNF(ax,ay);
802 const int bx = n(0);
803 const int by = n(1);
804
805 const int r = patchSize/2;
806
807 for(int oy=-r;oy<=+r;oy++)
808 for(int ox=-r;ox<=+r;ox++)
809 {
810 const int x = bx+ox;
811 const int y = by+oy;
812 cpu_Omega(x,y) += 1;
813 }
814 }
815
816 copy(&pyramid[level].Omega,cpu_Omega);
817 */
818 /////////////////////////////////////////////////////////////////////////
819 }
820
821 ////////////////////////////////////////////////////////////////////////////
822 {
823 krnlVotePlain(pyramid[level].targetStyle2,
824 pyramid[level].sourceStyle,
825 pyramid[level].NNF,
826 patchSize);
827
828 std::swap(pyramid[level].targetStyle2,pyramid[level].targetStyle);
829 }
830 ////////////////////////////////////////////////////////////////////////////
831
832 //Array2<Vec<1,unsigned char>> cpu_mask(V2i(pyramid[level].targetWidth,pyramid[level].targetHeight));
833 //fill(&cpu_mask,Vec<1,unsigned char>(255));
834 //copy(&pyramid[level].mask,cpu_mask);
835
836 ////////////////////////////////////////////////////////////////////////////
837
838 for (int voteIter=0;voteIter<numSearchVoteItersPerLevel[level];voteIter++)
839 {
840 Vec<NS,float> styleWeightsVec;
841 for(int i=0;i<NS;i++) { styleWeightsVec[i] = styleWeights[i]; }
842
843 Vec<NG,float> guideWeightsVec;
844 for(int i=0;i<NG;i++) { guideWeightsVec[i] = guideWeights[i]; }
845
846 //if (numPatchMatchItersPerLevel[level]>0)
847 {
848 /*if (targetModulationData)
849 {
850 patchmatchGPU(V2i(pyramid[level].targetWidth,pyramid[level].targetHeight),
851 V2i(pyramid[level].sourceWidth,pyramid[level].sourceHeight),
852 pyramid[level].Omega,
853 patchSize,
854 PatchSSD_Split_Modulation<NS,NG,unsigned char>(pyramid[level].targetStyle,
855 pyramid[level].sourceStyle,
856 pyramid[level].targetGuide,
857 pyramid[level].sourceGuide,
858 pyramid[level].targetModulation,
859 styleWeightsVec,
860 guideWeightsVec),
861 uniformityWeight,
862 numPatchMatchItersPerLevel[level],
863 numGpuThreadsPerBlock,
864 pyramid[level].NNF,
865 pyramid[level].NNF2,
866 pyramid[level].E,
867 pyramid[level].mask,
868 rngStates);
869 }
870 else*/
871 {
872 patchmatch(V2i(pyramid[level].targetWidth,pyramid[level].targetHeight),
873 V2i(pyramid[level].sourceWidth,pyramid[level].sourceHeight),
874 patchSize,
875 PatchSSD_Split<NS,NG,unsigned char>(pyramid[level].targetStyle,
876 pyramid[level].sourceStyle,
877 pyramid[level].targetGuide,
878 pyramid[level].sourceGuide,
879 styleWeightsVec,
880 guideWeightsVec),
881 uniformityWeight,
882 numPatchMatchItersPerLevel[level],
883 -1,
884 pyramid[level].NNF,
885 pyramid[level].E,
886 pyramid[level].Omega);
887 }
888 }
889 /*
890 else
891 {
892 if (targetModulationData)
893 {
894 krnlEvalErrorPass<<<numBlocks,threadsPerBlock>>>(patchSize,
895 PatchSSD_Split_Modulation<NS,NG,unsigned char>(pyramid[level].targetStyle,
896 pyramid[level].sourceStyle,
897 pyramid[level].targetGuide,
898 pyramid[level].sourceGuide,
899 pyramid[level].targetModulation,
900 styleWeightsVec,
901 guideWeightsVec),
902 pyramid[level].NNF,
903 pyramid[level].E);
904 }
905 else
906 {
907 krnlEvalErrorPass<<<numBlocks,threadsPerBlock>>>(patchSize,
908 PatchSSD_Split<NS,NG,unsigned char>(pyramid[level].targetStyle,
909 pyramid[level].sourceStyle,
910 pyramid[level].targetGuide,
911 pyramid[level].sourceGuide,
912 styleWeightsVec,
913 guideWeightsVec),
914 pyramid[level].NNF,
915 pyramid[level].E);
916 }
917 checkCudaError( cudaDeviceSynchronize() );
918 }
919 */
920 {
921 //if (voteMode==EBSYNTH_VOTEMODE_PLAIN)
922 {
923 krnlVotePlain(pyramid[level].targetStyle2,
924 pyramid[level].sourceStyle,
925 pyramid[level].NNF,
926 patchSize);
927 }
928 /*else if (voteMode==EBSYNTH_VOTEMODE_WEIGHTED)
929 {
930 krnlVoteWeighted<<<numBlocks,threadsPerBlock>>>(pyramid[level].targetStyle2,
931 pyramid[level].sourceStyle,
932 pyramid[level].NNF,
933 pyramid[level].E,
934 patchSize);
935 }*/
936
937 std::swap(pyramid[level].targetStyle2,pyramid[level].targetStyle);
938
939 /*
940 if (voteIter<numSearchVoteItersPerLevel[level]-1)
941 {
942 krnlEvalMask<<<numBlocks,threadsPerBlock>>>(pyramid[level].mask,
943 pyramid[level].targetStyle,
944 pyramid[level].targetStyle2,
945 stopThresholdPerLevel[level]);
946 checkCudaError( cudaDeviceSynchronize() );
947
948 krnlDilateMask<<<numBlocks,threadsPerBlock>>>(pyramid[level].mask2,
949 pyramid[level].mask,
950 patchSize);
951 std::swap(pyramid[level].mask2,pyramid[level].mask);
952 checkCudaError( cudaDeviceSynchronize() );
953 }
954 */
955 }
956 }
957
958 if (level==levelCount-1 && (extraPass3x3==0 || (extraPass3x3!=0 && inExtraPass)))
959 {
960 if (outputNnfData!=NULL) { copy(&outputNnfData,pyramid[level].NNF); }
961 copy(&outputImageData,pyramid[level].targetStyle);
962 }
963
964 if ((level<levelCount-1) ||
965 (extraPass3x3==0) ||
966 (extraPass3x3!=0 && inExtraPass))
967 {
968 pyramid[level].sourceStyle = Array2<Vec<NS,unsigned char>>();
969 pyramid[level].sourceGuide = Array2<Vec<NG,unsigned char>>();
970 pyramid[level].targetGuide = Array2<Vec<NG,unsigned char>>();
971 pyramid[level].targetStyle = Array2<Vec<NS,unsigned char>>();
972 pyramid[level].targetStyle2 = Array2<Vec<NS,unsigned char>>();
973 //pyramid[level].mask = Array2<unsigned char>();
974 //pyramid[level].mask2 = Array2<unsigned char>();
975 //pyramid[level].NNF2 = Array2<Vec<2,int>>();
976 pyramid[level].Omega = Array2<int>();
977 pyramid[level].E = Array2<float>();
978 if (targetModulationData) { pyramid[level].targetModulation = Array2<Vec<NG,unsigned char>>(); }
979 }
980
981 if (level==levelCount-1 && (extraPass3x3!=0) && !inExtraPass)
982 {
983 inExtraPass = true;
984 level--;
985 patchSize = 3;
986 uniformityWeight = 0;
987 }
988 }
989
990 pyramid[levelCount-1].NNF = Array2<Vec<2,int>>();
991 }
992
ebsynthRunCpu(int numStyleChannels,int numGuideChannels,int sourceWidth,int sourceHeight,void * sourceStyleData,void * sourceGuideData,int targetWidth,int targetHeight,void * targetGuideData,void * targetModulationData,float * styleWeights,float * guideWeights,float uniformityWeight,int patchSize,int voteMode,int numPyramidLevels,int * numSearchVoteItersPerLevel,int * numPatchMatchItersPerLevel,int * stopThresholdPerLevel,int extraPass3x3,void * outputNnfData,void * outputImageData)993 void ebsynthRunCpu(int numStyleChannels,
994 int numGuideChannels,
995 int sourceWidth,
996 int sourceHeight,
997 void* sourceStyleData,
998 void* sourceGuideData,
999 int targetWidth,
1000 int targetHeight,
1001 void* targetGuideData,
1002 void* targetModulationData,
1003 float* styleWeights,
1004 float* guideWeights,
1005 float uniformityWeight,
1006 int patchSize,
1007 int voteMode,
1008 int numPyramidLevels,
1009 int* numSearchVoteItersPerLevel,
1010 int* numPatchMatchItersPerLevel,
1011 int* stopThresholdPerLevel,
1012 int extraPass3x3,
1013 void* outputNnfData,
1014 void* outputImageData)
1015 {
1016 void (*const dispatchEbsynth[EBSYNTH_MAX_GUIDE_CHANNELS][EBSYNTH_MAX_STYLE_CHANNELS])(int,int,int,int,void*,void*,int,int,void*,void*,float*,float*,float,int,int,int,int*,int*,int*,int,void*,void*) =
1017 {
1018 { ebsynthCpu<1, 1>, ebsynthCpu<2, 1>, ebsynthCpu<3, 1>, ebsynthCpu<4, 1>, ebsynthCpu<5, 1>, ebsynthCpu<6, 1>, ebsynthCpu<7, 1>, ebsynthCpu<8, 1> },
1019 { ebsynthCpu<1, 2>, ebsynthCpu<2, 2>, ebsynthCpu<3, 2>, ebsynthCpu<4, 2>, ebsynthCpu<5, 2>, ebsynthCpu<6, 2>, ebsynthCpu<7, 2>, ebsynthCpu<8, 2> },
1020 { ebsynthCpu<1, 3>, ebsynthCpu<2, 3>, ebsynthCpu<3, 3>, ebsynthCpu<4, 3>, ebsynthCpu<5, 3>, ebsynthCpu<6, 3>, ebsynthCpu<7, 3>, ebsynthCpu<8, 3> },
1021 { ebsynthCpu<1, 4>, ebsynthCpu<2, 4>, ebsynthCpu<3, 4>, ebsynthCpu<4, 4>, ebsynthCpu<5, 4>, ebsynthCpu<6, 4>, ebsynthCpu<7, 4>, ebsynthCpu<8, 4> },
1022 { ebsynthCpu<1, 5>, ebsynthCpu<2, 5>, ebsynthCpu<3, 5>, ebsynthCpu<4, 5>, ebsynthCpu<5, 5>, ebsynthCpu<6, 5>, ebsynthCpu<7, 5>, ebsynthCpu<8, 5> },
1023 { ebsynthCpu<1, 6>, ebsynthCpu<2, 6>, ebsynthCpu<3, 6>, ebsynthCpu<4, 6>, ebsynthCpu<5, 6>, ebsynthCpu<6, 6>, ebsynthCpu<7, 6>, ebsynthCpu<8, 6> },
1024 { ebsynthCpu<1, 7>, ebsynthCpu<2, 7>, ebsynthCpu<3, 7>, ebsynthCpu<4, 7>, ebsynthCpu<5, 7>, ebsynthCpu<6, 7>, ebsynthCpu<7, 7>, ebsynthCpu<8, 7> },
1025 { ebsynthCpu<1, 8>, ebsynthCpu<2, 8>, ebsynthCpu<3, 8>, ebsynthCpu<4, 8>, ebsynthCpu<5, 8>, ebsynthCpu<6, 8>, ebsynthCpu<7, 8>, ebsynthCpu<8, 8> },
1026 { ebsynthCpu<1, 9>, ebsynthCpu<2, 9>, ebsynthCpu<3, 9>, ebsynthCpu<4, 9>, ebsynthCpu<5, 9>, ebsynthCpu<6, 9>, ebsynthCpu<7, 9>, ebsynthCpu<8, 9> },
1027 { ebsynthCpu<1,10>, ebsynthCpu<2,10>, ebsynthCpu<3,10>, ebsynthCpu<4,10>, ebsynthCpu<5,10>, ebsynthCpu<6,10>, ebsynthCpu<7,10>, ebsynthCpu<8,10> },
1028 { ebsynthCpu<1,11>, ebsynthCpu<2,11>, ebsynthCpu<3,11>, ebsynthCpu<4,11>, ebsynthCpu<5,11>, ebsynthCpu<6,11>, ebsynthCpu<7,11>, ebsynthCpu<8,11> },
1029 { ebsynthCpu<1,12>, ebsynthCpu<2,12>, ebsynthCpu<3,12>, ebsynthCpu<4,12>, ebsynthCpu<5,12>, ebsynthCpu<6,12>, ebsynthCpu<7,12>, ebsynthCpu<8,12> },
1030 { ebsynthCpu<1,13>, ebsynthCpu<2,13>, ebsynthCpu<3,13>, ebsynthCpu<4,13>, ebsynthCpu<5,13>, ebsynthCpu<6,13>, ebsynthCpu<7,13>, ebsynthCpu<8,13> },
1031 { ebsynthCpu<1,14>, ebsynthCpu<2,14>, ebsynthCpu<3,14>, ebsynthCpu<4,14>, ebsynthCpu<5,14>, ebsynthCpu<6,14>, ebsynthCpu<7,14>, ebsynthCpu<8,14> },
1032 { ebsynthCpu<1,15>, ebsynthCpu<2,15>, ebsynthCpu<3,15>, ebsynthCpu<4,15>, ebsynthCpu<5,15>, ebsynthCpu<6,15>, ebsynthCpu<7,15>, ebsynthCpu<8,15> },
1033 { ebsynthCpu<1,16>, ebsynthCpu<2,16>, ebsynthCpu<3,16>, ebsynthCpu<4,16>, ebsynthCpu<5,16>, ebsynthCpu<6,16>, ebsynthCpu<7,16>, ebsynthCpu<8,16> },
1034 { ebsynthCpu<1,17>, ebsynthCpu<2,17>, ebsynthCpu<3,17>, ebsynthCpu<4,17>, ebsynthCpu<5,17>, ebsynthCpu<6,17>, ebsynthCpu<7,17>, ebsynthCpu<8,17> },
1035 { ebsynthCpu<1,18>, ebsynthCpu<2,18>, ebsynthCpu<3,18>, ebsynthCpu<4,18>, ebsynthCpu<5,18>, ebsynthCpu<6,18>, ebsynthCpu<7,18>, ebsynthCpu<8,18> },
1036 { ebsynthCpu<1,19>, ebsynthCpu<2,19>, ebsynthCpu<3,19>, ebsynthCpu<4,19>, ebsynthCpu<5,19>, ebsynthCpu<6,19>, ebsynthCpu<7,19>, ebsynthCpu<8,19> },
1037 { ebsynthCpu<1,20>, ebsynthCpu<2,20>, ebsynthCpu<3,20>, ebsynthCpu<4,20>, ebsynthCpu<5,20>, ebsynthCpu<6,20>, ebsynthCpu<7,20>, ebsynthCpu<8,20> },
1038 { ebsynthCpu<1,21>, ebsynthCpu<2,21>, ebsynthCpu<3,21>, ebsynthCpu<4,21>, ebsynthCpu<5,21>, ebsynthCpu<6,21>, ebsynthCpu<7,21>, ebsynthCpu<8,21> },
1039 { ebsynthCpu<1,22>, ebsynthCpu<2,22>, ebsynthCpu<3,22>, ebsynthCpu<4,22>, ebsynthCpu<5,22>, ebsynthCpu<6,22>, ebsynthCpu<7,22>, ebsynthCpu<8,22> },
1040 { ebsynthCpu<1,23>, ebsynthCpu<2,23>, ebsynthCpu<3,23>, ebsynthCpu<4,23>, ebsynthCpu<5,23>, ebsynthCpu<6,23>, ebsynthCpu<7,23>, ebsynthCpu<8,23> },
1041 { ebsynthCpu<1,24>, ebsynthCpu<2,24>, ebsynthCpu<3,24>, ebsynthCpu<4,24>, ebsynthCpu<5,24>, ebsynthCpu<6,24>, ebsynthCpu<7,24>, ebsynthCpu<8,24> }
1042 };
1043
1044 if (numStyleChannels>=1 && numStyleChannels<=EBSYNTH_MAX_STYLE_CHANNELS &&
1045 numGuideChannels>=1 && numGuideChannels<=EBSYNTH_MAX_GUIDE_CHANNELS)
1046 {
1047 dispatchEbsynth[numGuideChannels-1][numStyleChannels-1](numStyleChannels,
1048 numGuideChannels,
1049 sourceWidth,
1050 sourceHeight,
1051 sourceStyleData,
1052 sourceGuideData,
1053 targetWidth,
1054 targetHeight,
1055 targetGuideData,
1056 targetModulationData,
1057 styleWeights,
1058 guideWeights,
1059 uniformityWeight,
1060 patchSize,
1061 voteMode,
1062 numPyramidLevels,
1063 numSearchVoteItersPerLevel,
1064 numPatchMatchItersPerLevel,
1065 stopThresholdPerLevel,
1066 extraPass3x3,
1067 outputNnfData,
1068 outputImageData);
1069 }
1070 }
1071
ebsynthBackendAvailableCpu()1072 int ebsynthBackendAvailableCpu()
1073 {
1074 return 1;
1075 }
1076