1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 /*
44 //
45 // This implementation is based on Javier Sánchez Pérez <jsanchez@dis.ulpgc.es> implementation.
46 // Original BSD license:
47 //
48 // Copyright (c) 2011, Javier Sánchez Pérez, Enric Meinhardt Llopis
49 // All rights reserved.
50 //
51 // Redistribution and use in source and binary forms, with or without
52 // modification, are permitted provided that the following conditions are met:
53 //
54 // * Redistributions of source code must retain the above copyright notice, this
55 //   list of conditions and the following disclaimer.
56 //
57 // * Redistributions in binary form must reproduce the above copyright notice,
58 //   this list of conditions and the following disclaimer in the documentation
59 //   and/or other materials provided with the distribution.
60 //
61 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
62 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
65 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
66 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
67 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
68 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
69 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
70 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
71 // POSSIBILITY OF SUCH DAMAGE.
72 //
73 */
74 
75 #include "precomp.hpp"
76 #include "opencl_kernels_optflow.hpp"
77 
78 #include <limits>
79 #include <iomanip>
80 #include <iostream>
81 #include "opencv2/core/opencl/ocl_defs.hpp"
82 
83 namespace cv {
84 namespace optflow {
85 
86 class OpticalFlowDual_TVL1 : public DualTVL1OpticalFlow
87 {
88 public:
89 
OpticalFlowDual_TVL1(double tau_,double lambda_,double theta_,int nscales_,int warps_,double epsilon_,int innerIterations_,int outerIterations_,double scaleStep_,double gamma_,int medianFiltering_,bool useInitialFlow_)90     OpticalFlowDual_TVL1(double tau_, double lambda_, double theta_, int nscales_, int warps_,
91                          double epsilon_, int innerIterations_, int outerIterations_,
92                          double scaleStep_, double gamma_, int medianFiltering_,
93                          bool useInitialFlow_) :
94         tau(tau_), lambda(lambda_), theta(theta_), gamma(gamma_), nscales(nscales_),
95         warps(warps_), epsilon(epsilon_), innerIterations(innerIterations_),
96         outerIterations(outerIterations_), useInitialFlow(useInitialFlow_),
97         scaleStep(scaleStep_), medianFiltering(medianFiltering_)
98     {
99     }
100     OpticalFlowDual_TVL1();
101 
getDefaultName() const102     virtual String getDefaultName() const CV_OVERRIDE { return "DenseOpticalFlow.DualTVL1OpticalFlow"; }
103 
104     void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE;
105     void collectGarbage() CV_OVERRIDE;
106 
getTau() const107     inline double getTau() const CV_OVERRIDE { return tau; }
setTau(double val)108     inline void setTau(double val) CV_OVERRIDE { tau = val; }
getLambda() const109     inline double getLambda() const CV_OVERRIDE { return lambda; }
setLambda(double val)110     inline void setLambda(double val) CV_OVERRIDE { lambda = val; }
getTheta() const111     inline double getTheta() const CV_OVERRIDE { return theta; }
setTheta(double val)112     inline void setTheta(double val) CV_OVERRIDE { theta = val; }
getGamma() const113     inline double getGamma() const CV_OVERRIDE { return gamma; }
setGamma(double val)114     inline void setGamma(double val) CV_OVERRIDE { gamma = val; }
getScalesNumber() const115     inline int getScalesNumber() const CV_OVERRIDE { return nscales; }
setScalesNumber(int val)116     inline void setScalesNumber(int val) CV_OVERRIDE { nscales = val; }
getWarpingsNumber() const117     inline int getWarpingsNumber() const CV_OVERRIDE { return warps; }
setWarpingsNumber(int val)118     inline void setWarpingsNumber(int val) CV_OVERRIDE { warps = val; }
getEpsilon() const119     inline double getEpsilon() const CV_OVERRIDE { return epsilon; }
setEpsilon(double val)120     inline void setEpsilon(double val) CV_OVERRIDE { epsilon = val; }
getInnerIterations() const121     inline int getInnerIterations() const CV_OVERRIDE { return innerIterations; }
setInnerIterations(int val)122     inline void setInnerIterations(int val) CV_OVERRIDE { innerIterations = val; }
getOuterIterations() const123     inline int getOuterIterations() const CV_OVERRIDE { return outerIterations; }
setOuterIterations(int val)124     inline void setOuterIterations(int val) CV_OVERRIDE { outerIterations = val; }
getUseInitialFlow() const125     inline bool getUseInitialFlow() const CV_OVERRIDE { return useInitialFlow; }
setUseInitialFlow(bool val)126     inline void setUseInitialFlow(bool val) CV_OVERRIDE { useInitialFlow = val; }
getScaleStep() const127     inline double getScaleStep() const CV_OVERRIDE { return scaleStep; }
setScaleStep(double val)128     inline void setScaleStep(double val) CV_OVERRIDE { scaleStep = val; }
getMedianFiltering() const129     inline int getMedianFiltering() const CV_OVERRIDE { return medianFiltering; }
setMedianFiltering(int val)130     inline void setMedianFiltering(int val) CV_OVERRIDE { medianFiltering = val; }
131 
132 protected:
133     double tau;
134     double lambda;
135     double theta;
136     double gamma;
137     int nscales;
138     int warps;
139     double epsilon;
140     int innerIterations;
141     int outerIterations;
142     bool useInitialFlow;
143     double scaleStep;
144     int medianFiltering;
145 
146 private:
147     void procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3);
148 
149 #ifdef HAVE_OPENCL
150     bool procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2);
151 
152     bool calc_ocl(InputArray I0, InputArray I1, InputOutputArray flow);
153 #endif
154     struct dataMat
155     {
156         std::vector<Mat_<float> > I0s;
157         std::vector<Mat_<float> > I1s;
158         std::vector<Mat_<float> > u1s;
159         std::vector<Mat_<float> > u2s;
160         std::vector<Mat_<float> > u3s;
161 
162         Mat_<float> I1x_buf;
163         Mat_<float> I1y_buf;
164 
165         Mat_<float> flowMap1_buf;
166         Mat_<float> flowMap2_buf;
167 
168         Mat_<float> I1w_buf;
169         Mat_<float> I1wx_buf;
170         Mat_<float> I1wy_buf;
171 
172         Mat_<float> grad_buf;
173         Mat_<float> rho_c_buf;
174 
175         Mat_<float> v1_buf;
176         Mat_<float> v2_buf;
177         Mat_<float> v3_buf;
178 
179         Mat_<float> p11_buf;
180         Mat_<float> p12_buf;
181         Mat_<float> p21_buf;
182         Mat_<float> p22_buf;
183         Mat_<float> p31_buf;
184         Mat_<float> p32_buf;
185 
186         Mat_<float> div_p1_buf;
187         Mat_<float> div_p2_buf;
188         Mat_<float> div_p3_buf;
189 
190         Mat_<float> u1x_buf;
191         Mat_<float> u1y_buf;
192         Mat_<float> u2x_buf;
193         Mat_<float> u2y_buf;
194         Mat_<float> u3x_buf;
195         Mat_<float> u3y_buf;
196     } dm;
197 
198 #ifdef HAVE_OPENCL
199     struct dataUMat
200     {
201         std::vector<UMat> I0s;
202         std::vector<UMat> I1s;
203         std::vector<UMat> u1s;
204         std::vector<UMat> u2s;
205 
206         UMat I1x_buf;
207         UMat I1y_buf;
208 
209         UMat I1w_buf;
210         UMat I1wx_buf;
211         UMat I1wy_buf;
212 
213         UMat grad_buf;
214         UMat rho_c_buf;
215 
216         UMat p11_buf;
217         UMat p12_buf;
218         UMat p21_buf;
219         UMat p22_buf;
220 
221         UMat diff_buf;
222         UMat norm_buf;
223     } dum;
224 #endif
225 };
226 
227 #ifdef HAVE_OPENCL
228 namespace cv_ocl_tvl1flow
229 {
230     bool centeredGradient(const UMat &src, UMat &dx, UMat &dy);
231 
232     bool warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
233         UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
234         UMat &grad, UMat &rho);
235 
236     bool estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
237         UMat &rho_c, UMat &p11, UMat &p12,
238         UMat &p21, UMat &p22, UMat &u1,
239         UMat &u2, UMat &error, float l_t, float theta, char calc_error);
240 
241     bool estimateDualVariables(UMat &u1, UMat &u2,
242         UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut);
243 }
244 
centeredGradient(const UMat & src,UMat & dx,UMat & dy)245 bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy)
246 {
247     size_t globalsize[2] = { (size_t)src.cols, (size_t)src.rows };
248 
249     ocl::Kernel kernel;
250     if (!kernel.create("centeredGradientKernel", ocl::optflow::optical_flow_tvl1_oclsrc, ""))
251         return false;
252 
253     int idxArg = 0;
254     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));//src mat
255     idxArg = kernel.set(idxArg, (int)(src.cols));//src mat col
256     idxArg = kernel.set(idxArg, (int)(src.rows));//src mat rows
257     idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));//src mat step
258     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx
259     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy
260     idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step
261     return kernel.run(2, globalsize, NULL, false);
262 }
263 
warpBackward(const UMat & I0,const UMat & I1,UMat & I1x,UMat & I1y,UMat & u1,UMat & u2,UMat & I1w,UMat & I1wx,UMat & I1wy,UMat & grad,UMat & rho)264 bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
265     UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
266     UMat &grad, UMat &rho)
267 {
268     size_t globalsize[2] = { (size_t)I0.cols, (size_t)I0.rows };
269 
270     ocl::Kernel kernel;
271     if (!kernel.create("warpBackwardKernel", ocl::optflow::optical_flow_tvl1_oclsrc, ""))
272         return false;
273 
274     int idxArg = 0;
275     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat
276     int I0_step = (int)(I0.step / I0.elemSize());
277     idxArg = kernel.set(idxArg, I0_step);//I0_step
278     idxArg = kernel.set(idxArg, (int)(I0.cols));//I0_col
279     idxArg = kernel.set(idxArg, (int)(I0.rows));//I0_row
280     ocl::Image2D imageI1(I1);
281     ocl::Image2D imageI1x(I1x);
282     ocl::Image2D imageI1y(I1y);
283     idxArg = kernel.set(idxArg, imageI1);//image2d_t tex_I1
284     idxArg = kernel.set(idxArg, imageI1x);//image2d_t tex_I1x
285     idxArg = kernel.set(idxArg, imageI1y);//image2d_t tex_I1y
286     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));//const float* u1
287     idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize()));//int u1_step
288     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2));//const float* u2
289     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1w));///float* I1w
290     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wx));//float* I1wx
291     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wy));//float* I1wy
292     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(grad));//float* grad
293     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(rho));//float* rho
294     idxArg = kernel.set(idxArg, (int)(I1w.step / I1w.elemSize()));//I1w_step
295     idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//u2_step
296     int u1_offset_x = (int)((u1.offset) % (u1.step));
297     u1_offset_x = (int)(u1_offset_x / u1.elemSize());
298     idxArg = kernel.set(idxArg, (int)u1_offset_x );//u1_offset_x
299     idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step));//u1_offset_y
300     int u2_offset_x = (int)((u2.offset) % (u2.step));
301     u2_offset_x = (int) (u2_offset_x / u2.elemSize());
302     idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x
303     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y
304     return kernel.run(2, globalsize, NULL, false);
305 }
306 
estimateU(UMat & I1wx,UMat & I1wy,UMat & grad,UMat & rho_c,UMat & p11,UMat & p12,UMat & p21,UMat & p22,UMat & u1,UMat & u2,UMat & error,float l_t,float theta,char calc_error)307 bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
308     UMat &rho_c, UMat &p11, UMat &p12,
309     UMat &p21, UMat &p22, UMat &u1,
310     UMat &u2, UMat &error, float l_t, float theta, char calc_error)
311 {
312     size_t globalsize[2] = { (size_t)I1wx.cols, (size_t)I1wx.rows };
313 
314     ocl::Kernel kernel;
315     if (!kernel.create("estimateUKernel", ocl::optflow::optical_flow_tvl1_oclsrc, ""))
316         return false;
317 
318     int idxArg = 0;
319     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx
320     idxArg = kernel.set(idxArg, (int)(I1wx.cols)); //int I1wx_col
321     idxArg = kernel.set(idxArg, (int)(I1wx.rows)); //int I1wx_row
322     idxArg = kernel.set(idxArg, (int)(I1wx.step/I1wx.elemSize())); //int I1wx_step
323     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wy)); //const float* I1wy
324     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(grad)); //const float* grad
325     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(rho_c)); //const float* rho_c
326     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p11)); //const float* p11
327     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p12)); //const float* p12
328     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p21)); //const float* p21
329     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p22)); //const float* p22
330     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u1)); //float* u1
331     idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step
332     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u2)); //float* u2
333     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(error)); //float* error
334     idxArg = kernel.set(idxArg, (float)l_t); //float l_t
335     idxArg = kernel.set(idxArg, (float)theta); //float theta
336     idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//int u2_step
337     int u1_offset_x = (int)(u1.offset % u1.step);
338     u1_offset_x = (int) (u1_offset_x  / u1.elemSize());
339     idxArg = kernel.set(idxArg, (int)u1_offset_x); //int u1_offset_x
340     idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step)); //int u1_offset_y
341     int u2_offset_x = (int)(u2.offset % u2.step);
342     u2_offset_x = (int)(u2_offset_x / u2.elemSize());
343     idxArg = kernel.set(idxArg, (int)u2_offset_x ); //int u2_offset_x
344     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
345     idxArg = kernel.set(idxArg, (char)calc_error);    //char calc_error
346 
347     return kernel.run(2, globalsize, NULL, false);
348 }
349 
estimateDualVariables(UMat & u1,UMat & u2,UMat & p11,UMat & p12,UMat & p21,UMat & p22,float taut)350 bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2,
351     UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut)
352 {
353     size_t globalsize[2] = { (size_t)u1.cols, (size_t)u1.rows };
354 
355     ocl::Kernel kernel;
356     if (!kernel.create("estimateDualVariablesKernel", ocl::optflow::optical_flow_tvl1_oclsrc, ""))
357         return false;
358 
359     int idxArg = 0;
360     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1
361     idxArg = kernel.set(idxArg, (int)(u1.cols)); //int u1_col
362     idxArg = kernel.set(idxArg, (int)(u1.rows)); //int u1_row
363     idxArg = kernel.set(idxArg, (int)(u1.step/u1.elemSize())); //int u1_step
364     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2)); // const float* u2
365     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p11)); // float* p11
366     idxArg = kernel.set(idxArg, (int)(p11.step/p11.elemSize())); //int p11_step
367     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p12)); // float* p12
368     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p21)); // float* p21
369     idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p22)); // float* p22
370     idxArg = kernel.set(idxArg, (float)(taut));    //float taut
371     idxArg = kernel.set(idxArg, (int)(u2.step/u2.elemSize())); //int u2_step
372     int u1_offset_x = (int)(u1.offset % u1.step);
373     u1_offset_x = (int)(u1_offset_x / u1.elemSize());
374     idxArg = kernel.set(idxArg, u1_offset_x); //int u1_offset_x
375     idxArg = kernel.set(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y
376     int u2_offset_x = (int)(u2.offset % u2.step);
377     u2_offset_x = (int)(u2_offset_x / u2.elemSize());
378     idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x
379     idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
380 
381     return kernel.run(2, globalsize, NULL, false);
382 
383 }
384 #endif
385 
OpticalFlowDual_TVL1()386 OpticalFlowDual_TVL1::OpticalFlowDual_TVL1()
387 {
388     tau            = 0.25;
389     lambda         = 0.15;
390     theta          = 0.3;
391     nscales        = 5;
392     warps          = 5;
393     epsilon        = 0.01;
394     gamma          = 0.;
395     innerIterations = 30;
396     outerIterations = 10;
397     useInitialFlow = false;
398     medianFiltering = 5;
399     scaleStep      = 0.8;
400 }
401 
calc(InputArray _I0,InputArray _I1,InputOutputArray _flow)402 void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow)
403 {
404     CV_INSTRUMENT_REGION();
405 
406 #ifndef __APPLE__
407     CV_OCL_RUN(_flow.isUMat() &&
408                ocl::Image2D::isFormatSupported(CV_32F, 1, false),
409                calc_ocl(_I0, _I1, _flow))
410 #endif
411 
412     Mat I0 = _I0.getMat();
413     Mat I1 = _I1.getMat();
414 
415     CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
416     CV_Assert( I0.size() == I1.size() );
417     CV_Assert( I0.type() == I1.type() );
418     CV_Assert( !useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2) );
419     CV_Assert( nscales > 0 );
420     bool use_gamma = gamma != 0;
421     // allocate memory for the pyramid structure
422     dm.I0s.resize(nscales);
423     dm.I1s.resize(nscales);
424     dm.u1s.resize(nscales);
425     dm.u2s.resize(nscales);
426     dm.u3s.resize(nscales);
427 
428     I0.convertTo(dm.I0s[0], dm.I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0);
429     I1.convertTo(dm.I1s[0], dm.I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0);
430 
431     dm.u1s[0].create(I0.size());
432     dm.u2s[0].create(I0.size());
433     if (use_gamma) dm.u3s[0].create(I0.size());
434 
435     if (useInitialFlow)
436     {
437         Mat_<float> mv[] = { dm.u1s[0], dm.u2s[0] };
438         split(_flow.getMat(), mv);
439     }
440 
441     dm.I1x_buf.create(I0.size());
442     dm.I1y_buf.create(I0.size());
443 
444     dm.flowMap1_buf.create(I0.size());
445     dm.flowMap2_buf.create(I0.size());
446 
447     dm.I1w_buf.create(I0.size());
448     dm.I1wx_buf.create(I0.size());
449     dm.I1wy_buf.create(I0.size());
450 
451     dm.grad_buf.create(I0.size());
452     dm.rho_c_buf.create(I0.size());
453 
454     dm.v1_buf.create(I0.size());
455     dm.v2_buf.create(I0.size());
456     dm.v3_buf.create(I0.size());
457 
458     dm.p11_buf.create(I0.size());
459     dm.p12_buf.create(I0.size());
460     dm.p21_buf.create(I0.size());
461     dm.p22_buf.create(I0.size());
462     dm.p31_buf.create(I0.size());
463     dm.p32_buf.create(I0.size());
464 
465     dm.div_p1_buf.create(I0.size());
466     dm.div_p2_buf.create(I0.size());
467     dm.div_p3_buf.create(I0.size());
468 
469     dm.u1x_buf.create(I0.size());
470     dm.u1y_buf.create(I0.size());
471     dm.u2x_buf.create(I0.size());
472     dm.u2y_buf.create(I0.size());
473     dm.u3x_buf.create(I0.size());
474     dm.u3y_buf.create(I0.size());
475 
476     // create the scales
477     for (int s = 1; s < nscales; ++s)
478     {
479         resize(dm.I0s[s - 1], dm.I0s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
480         resize(dm.I1s[s - 1], dm.I1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
481 
482         if (dm.I0s[s].cols < 16 || dm.I0s[s].rows < 16)
483         {
484             nscales = s;
485             break;
486         }
487 
488         if (useInitialFlow)
489         {
490             resize(dm.u1s[s - 1], dm.u1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
491             resize(dm.u2s[s - 1], dm.u2s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
492 
493             multiply(dm.u1s[s], Scalar::all(scaleStep), dm.u1s[s]);
494             multiply(dm.u2s[s], Scalar::all(scaleStep), dm.u2s[s]);
495         }
496         else
497         {
498             dm.u1s[s].create(dm.I0s[s].size());
499             dm.u2s[s].create(dm.I0s[s].size());
500         }
501         if (use_gamma) dm.u3s[s].create(dm.I0s[s].size());
502     }
503     if (!useInitialFlow)
504     {
505         dm.u1s[nscales - 1].setTo(Scalar::all(0));
506         dm.u2s[nscales - 1].setTo(Scalar::all(0));
507     }
508     if (use_gamma) dm.u3s[nscales - 1].setTo(Scalar::all(0));
509     // pyramidal structure for computing the optical flow
510     for (int s = nscales - 1; s >= 0; --s)
511     {
512         // compute the optical flow at the current scale
513         procOneScale(dm.I0s[s], dm.I1s[s], dm.u1s[s], dm.u2s[s], dm.u3s[s]);
514 
515         // if this was the last scale, finish now
516         if (s == 0)
517             break;
518 
519         // otherwise, upsample the optical flow
520 
521         // zoom the optical flow for the next finer scale
522         resize(dm.u1s[s], dm.u1s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR);
523         resize(dm.u2s[s], dm.u2s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR);
524         if (use_gamma) resize(dm.u3s[s], dm.u3s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR);
525 
526         // scale the optical flow with the appropriate zoom factor (don't scale u3!)
527         multiply(dm.u1s[s - 1], Scalar::all(1 / scaleStep), dm.u1s[s - 1]);
528         multiply(dm.u2s[s - 1], Scalar::all(1 / scaleStep), dm.u2s[s - 1]);
529     }
530 
531     Mat uxy[] = { dm.u1s[0], dm.u2s[0] };
532     merge(uxy, 2, _flow);
533 }
534 
535 #ifdef HAVE_OPENCL
calc_ocl(InputArray _I0,InputArray _I1,InputOutputArray _flow)536 bool OpticalFlowDual_TVL1::calc_ocl(InputArray _I0, InputArray _I1, InputOutputArray _flow)
537 {
538     UMat I0 = _I0.getUMat();
539     UMat I1 = _I1.getUMat();
540 
541     CV_Assert(I0.type() == CV_8UC1 || I0.type() == CV_32FC1);
542     CV_Assert(I0.size() == I1.size());
543     CV_Assert(I0.type() == I1.type());
544     CV_Assert(!useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2));
545     CV_Assert(nscales > 0);
546 
547     // allocate memory for the pyramid structure
548     dum.I0s.resize(nscales);
549     dum.I1s.resize(nscales);
550     dum.u1s.resize(nscales);
551     dum.u2s.resize(nscales);
552     //I0s_step == I1s_step
553     double alpha = I0.depth() == CV_8U ? 1.0 : 255.0;
554 
555     I0.convertTo(dum.I0s[0], CV_32F, alpha);
556     I1.convertTo(dum.I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);
557 
558     dum.u1s[0].create(I0.size(), CV_32FC1);
559     dum.u2s[0].create(I0.size(), CV_32FC1);
560 
561     if (useInitialFlow)
562     {
563         std::vector<UMat> umv;
564         umv.push_back(dum.u1s[0]);
565         umv.push_back(dum.u2s[0]);
566         cv::split(_flow,umv);
567     }
568 
569     dum.I1x_buf.create(I0.size(), CV_32FC1);
570     dum.I1y_buf.create(I0.size(), CV_32FC1);
571 
572     dum.I1w_buf.create(I0.size(), CV_32FC1);
573     dum.I1wx_buf.create(I0.size(), CV_32FC1);
574     dum.I1wy_buf.create(I0.size(), CV_32FC1);
575 
576     dum.grad_buf.create(I0.size(), CV_32FC1);
577     dum.rho_c_buf.create(I0.size(), CV_32FC1);
578 
579     dum.p11_buf.create(I0.size(), CV_32FC1);
580     dum.p12_buf.create(I0.size(), CV_32FC1);
581     dum.p21_buf.create(I0.size(), CV_32FC1);
582     dum.p22_buf.create(I0.size(), CV_32FC1);
583 
584     dum.diff_buf.create(I0.size(), CV_32FC1);
585 
586     // create the scales
587     for (int s = 1; s < nscales; ++s)
588     {
589         resize(dum.I0s[s - 1], dum.I0s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
590         resize(dum.I1s[s - 1], dum.I1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
591 
592         if (dum.I0s[s].cols < 16 || dum.I0s[s].rows < 16)
593         {
594             nscales = s;
595             break;
596         }
597 
598         if (useInitialFlow)
599         {
600             resize(dum.u1s[s - 1], dum.u1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
601             resize(dum.u2s[s - 1], dum.u2s[s], Size(), scaleStep, scaleStep, INTER_LINEAR);
602 
603             //scale by scale factor
604             multiply(dum.u1s[s], Scalar::all(scaleStep), dum.u1s[s]);
605             multiply(dum.u2s[s], Scalar::all(scaleStep), dum.u2s[s]);
606         }
607     }
608 
609     // pyramidal structure for computing the optical flow
610     for (int s = nscales - 1; s >= 0; --s)
611     {
612         // compute the optical flow at the current scale
613         if (!OpticalFlowDual_TVL1::procOneScale_ocl(dum.I0s[s], dum.I1s[s], dum.u1s[s], dum.u2s[s]))
614             return false;
615 
616         // if this was the last scale, finish now
617         if (s == 0)
618             break;
619 
620         // zoom the optical flow for the next finer scale
621         resize(dum.u1s[s], dum.u1s[s - 1], dum.I0s[s - 1].size(), 0, 0, INTER_LINEAR);
622         resize(dum.u2s[s], dum.u2s[s - 1], dum.I0s[s - 1].size(), 0, 0, INTER_LINEAR);
623 
624         // scale the optical flow with the appropriate zoom factor
625         multiply(dum.u1s[s - 1], Scalar::all(1 / scaleStep), dum.u1s[s - 1]);
626         multiply(dum.u2s[s - 1], Scalar::all(1 / scaleStep), dum.u2s[s - 1]);
627     }
628 
629     std::vector<UMat> uxy;
630     uxy.push_back(dum.u1s[0]);
631     uxy.push_back(dum.u2s[0]);
632     merge(uxy, _flow);
633     return true;
634 }
635 #endif
636 
637 ////////////////////////////////////////////////////////////
638 // buildFlowMap
639 
640 struct BuildFlowMapBody : ParallelLoopBody
641 {
642     void operator() (const Range& range) const CV_OVERRIDE;
643 
644     Mat_<float> u1;
645     Mat_<float> u2;
646     mutable Mat_<float> map1;
647     mutable Mat_<float> map2;
648 };
649 
operator ()(const Range & range) const650 void BuildFlowMapBody::operator() (const Range& range) const
651 {
652     for (int y = range.start; y < range.end; ++y)
653     {
654         const float* u1Row = u1[y];
655         const float* u2Row = u2[y];
656 
657         float* map1Row = map1[y];
658         float* map2Row = map2[y];
659 
660         for (int x = 0; x < u1.cols; ++x)
661         {
662             map1Row[x] = x + u1Row[x];
663             map2Row[x] = y + u2Row[x];
664         }
665     }
666 }
667 
buildFlowMap(const Mat_<float> & u1,const Mat_<float> & u2,Mat_<float> & map1,Mat_<float> & map2)668 static void buildFlowMap(const Mat_<float>& u1, const Mat_<float>& u2,
669                          Mat_<float>& map1, Mat_<float>& map2)
670 {
671     CV_DbgAssert( u2.size() == u1.size() );
672     CV_DbgAssert( map1.size() == u1.size() );
673     CV_DbgAssert( map2.size() == u1.size() );
674 
675     BuildFlowMapBody body;
676 
677     body.u1 = u1;
678     body.u2 = u2;
679     body.map1 = map1;
680     body.map2 = map2;
681 
682     parallel_for_(Range(0, u1.rows), body);
683 }
684 
685 ////////////////////////////////////////////////////////////
686 // centeredGradient
687 
688 struct CenteredGradientBody : ParallelLoopBody
689 {
690     void operator() (const Range& range) const CV_OVERRIDE;
691 
692     Mat_<float> src;
693     mutable Mat_<float> dx;
694     mutable Mat_<float> dy;
695 };
696 
operator ()(const Range & range) const697 void CenteredGradientBody::operator() (const Range& range) const
698 {
699     const int last_col = src.cols - 1;
700 
701     for (int y = range.start; y < range.end; ++y)
702     {
703         const float* srcPrevRow = src[y - 1];
704         const float* srcCurRow = src[y];
705         const float* srcNextRow = src[y + 1];
706 
707         float* dxRow = dx[y];
708         float* dyRow = dy[y];
709 
710         for (int x = 1; x < last_col; ++x)
711         {
712             dxRow[x] = 0.5f * (srcCurRow[x + 1] - srcCurRow[x - 1]);
713             dyRow[x] = 0.5f * (srcNextRow[x] - srcPrevRow[x]);
714         }
715     }
716 }
717 
centeredGradient(const Mat_<float> & src,Mat_<float> & dx,Mat_<float> & dy)718 static void centeredGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
719 {
720     CV_DbgAssert( src.rows > 2 && src.cols > 2 );
721     CV_DbgAssert( dx.size() == src.size() );
722     CV_DbgAssert( dy.size() == src.size() );
723 
724     const int last_row = src.rows - 1;
725     const int last_col = src.cols - 1;
726 
727     // compute the gradient on the center body of the image
728     {
729         CenteredGradientBody body;
730 
731         body.src = src;
732         body.dx = dx;
733         body.dy = dy;
734 
735         parallel_for_(Range(1, last_row), body);
736     }
737 
738     // compute the gradient on the first and last rows
739     for (int x = 1; x < last_col; ++x)
740     {
741         dx(0, x) = 0.5f * (src(0, x + 1) - src(0, x - 1));
742         dy(0, x) = 0.5f * (src(1, x) - src(0, x));
743 
744         dx(last_row, x) = 0.5f * (src(last_row, x + 1) - src(last_row, x - 1));
745         dy(last_row, x) = 0.5f * (src(last_row, x) - src(last_row - 1, x));
746     }
747 
748     // compute the gradient on the first and last columns
749     for (int y = 1; y < last_row; ++y)
750     {
751         dx(y, 0) = 0.5f * (src(y, 1) - src(y, 0));
752         dy(y, 0) = 0.5f * (src(y + 1, 0) - src(y - 1, 0));
753 
754         dx(y, last_col) = 0.5f * (src(y, last_col) - src(y, last_col - 1));
755         dy(y, last_col) = 0.5f * (src(y + 1, last_col) - src(y - 1, last_col));
756     }
757 
758     // compute the gradient at the four corners
759     dx(0, 0) = 0.5f * (src(0, 1) - src(0, 0));
760     dy(0, 0) = 0.5f * (src(1, 0) - src(0, 0));
761 
762     dx(0, last_col) = 0.5f * (src(0, last_col) - src(0, last_col - 1));
763     dy(0, last_col) = 0.5f * (src(1, last_col) - src(0, last_col));
764 
765     dx(last_row, 0) = 0.5f * (src(last_row, 1) - src(last_row, 0));
766     dy(last_row, 0) = 0.5f * (src(last_row, 0) - src(last_row - 1, 0));
767 
768     dx(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row, last_col - 1));
769     dy(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row - 1, last_col));
770 }
771 
772 ////////////////////////////////////////////////////////////
773 // forwardGradient
774 
775 struct ForwardGradientBody : ParallelLoopBody
776 {
777     void operator() (const Range& range) const CV_OVERRIDE;
778 
779     Mat_<float> src;
780     mutable Mat_<float> dx;
781     mutable Mat_<float> dy;
782 };
783 
operator ()(const Range & range) const784 void ForwardGradientBody::operator() (const Range& range) const
785 {
786     const int last_col = src.cols - 1;
787 
788     for (int y = range.start; y < range.end; ++y)
789     {
790         const float* srcCurRow = src[y];
791         const float* srcNextRow = src[y + 1];
792 
793         float* dxRow = dx[y];
794         float* dyRow = dy[y];
795 
796         for (int x = 0; x < last_col; ++x)
797         {
798             dxRow[x] = srcCurRow[x + 1] - srcCurRow[x];
799             dyRow[x] = srcNextRow[x] - srcCurRow[x];
800         }
801     }
802 }
803 
forwardGradient(const Mat_<float> & src,Mat_<float> & dx,Mat_<float> & dy)804 static void forwardGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
805 {
806     CV_DbgAssert( src.rows > 2 && src.cols > 2 );
807     CV_DbgAssert( dx.size() == src.size() );
808     CV_DbgAssert( dy.size() == src.size() );
809 
810     const int last_row = src.rows - 1;
811     const int last_col = src.cols - 1;
812 
813     // compute the gradient on the central body of the image
814     {
815         ForwardGradientBody body;
816 
817         body.src = src;
818         body.dx = dx;
819         body.dy = dy;
820 
821         parallel_for_(Range(0, last_row), body);
822     }
823 
824     // compute the gradient on the last row
825     for (int x = 0; x < last_col; ++x)
826     {
827         dx(last_row, x) = src(last_row, x + 1) - src(last_row, x);
828         dy(last_row, x) = 0.0f;
829     }
830 
831     // compute the gradient on the last column
832     for (int y = 0; y < last_row; ++y)
833     {
834         dx(y, last_col) = 0.0f;
835         dy(y, last_col) = src(y + 1, last_col) - src(y, last_col);
836     }
837 
838     dx(last_row, last_col) = 0.0f;
839     dy(last_row, last_col) = 0.0f;
840 }
841 
842 ////////////////////////////////////////////////////////////
843 // divergence
844 
845 struct DivergenceBody : ParallelLoopBody
846 {
847     void operator() (const Range& range) const CV_OVERRIDE;
848 
849     Mat_<float> v1;
850     Mat_<float> v2;
851     mutable Mat_<float> div;
852 };
853 
operator ()(const Range & range) const854 void DivergenceBody::operator() (const Range& range) const
855 {
856     for (int y = range.start; y < range.end; ++y)
857     {
858         const float* v1Row = v1[y];
859         const float* v2PrevRow = v2[y - 1];
860         const float* v2CurRow = v2[y];
861 
862         float* divRow = div[y];
863 
864         for(int x = 1; x < v1.cols; ++x)
865         {
866             const float v1x = v1Row[x] - v1Row[x - 1];
867             const float v2y = v2CurRow[x] - v2PrevRow[x];
868 
869             divRow[x] = v1x + v2y;
870         }
871     }
872 }
873 
divergence(const Mat_<float> & v1,const Mat_<float> & v2,Mat_<float> & div)874 static void divergence(const Mat_<float>& v1, const Mat_<float>& v2, Mat_<float>& div)
875 {
876     CV_DbgAssert( v1.rows > 2 && v1.cols > 2 );
877     CV_DbgAssert( v2.size() == v1.size() );
878     CV_DbgAssert( div.size() == v1.size() );
879 
880     {
881         DivergenceBody body;
882 
883         body.v1 = v1;
884         body.v2 = v2;
885         body.div = div;
886 
887         parallel_for_(Range(1, v1.rows), body);
888     }
889 
890     // compute the divergence on the first row
891     for(int x = 1; x < v1.cols; ++x)
892         div(0, x) = v1(0, x) - v1(0, x - 1) + v2(0, x);
893 
894     // compute the divergence on the first column
895     for (int y = 1; y < v1.rows; ++y)
896         div(y, 0) = v1(y, 0) + v2(y, 0) - v2(y - 1, 0);
897 
898     div(0, 0) = v1(0, 0) + v2(0, 0);
899 }
900 
901 ////////////////////////////////////////////////////////////
902 // calcGradRho
903 
904 struct CalcGradRhoBody : ParallelLoopBody
905 {
906     void operator() (const Range& range) const CV_OVERRIDE;
907 
908     Mat_<float> I0;
909     Mat_<float> I1w;
910     Mat_<float> I1wx;
911     Mat_<float> I1wy;
912     Mat_<float> u1;
913     Mat_<float> u2;
914     mutable Mat_<float> grad;
915     mutable Mat_<float> rho_c;
916 };
917 
operator ()(const Range & range) const918 void CalcGradRhoBody::operator() (const Range& range) const
919 {
920     for (int y = range.start; y < range.end; ++y)
921     {
922         const float* I0Row = I0[y];
923         const float* I1wRow = I1w[y];
924         const float* I1wxRow = I1wx[y];
925         const float* I1wyRow = I1wy[y];
926         const float* u1Row = u1[y];
927         const float* u2Row = u2[y];
928 
929         float* gradRow = grad[y];
930         float* rhoRow = rho_c[y];
931 
932         for (int x = 0; x < I0.cols; ++x)
933         {
934             const float Ix2 = I1wxRow[x] * I1wxRow[x];
935             const float Iy2 = I1wyRow[x] * I1wyRow[x];
936 
937             // store the |Grad(I1)|^2
938             gradRow[x] = Ix2 + Iy2;
939 
940             // compute the constant part of the rho function
941             rhoRow[x] = (I1wRow[x] - I1wxRow[x] * u1Row[x] - I1wyRow[x] * u2Row[x] - I0Row[x]);
942         }
943     }
944 }
945 
calcGradRho(const Mat_<float> & I0,const Mat_<float> & I1w,const Mat_<float> & I1wx,const Mat_<float> & I1wy,const Mat_<float> & u1,const Mat_<float> & u2,Mat_<float> & grad,Mat_<float> & rho_c)946 static void calcGradRho(const Mat_<float>& I0, const Mat_<float>& I1w, const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2,
947     Mat_<float>& grad, Mat_<float>& rho_c)
948 {
949     CV_DbgAssert( I1w.size() == I0.size() );
950     CV_DbgAssert( I1wx.size() == I0.size() );
951     CV_DbgAssert( I1wy.size() == I0.size() );
952     CV_DbgAssert( u1.size() == I0.size() );
953     CV_DbgAssert( u2.size() == I0.size() );
954     CV_DbgAssert( grad.size() == I0.size() );
955     CV_DbgAssert( rho_c.size() == I0.size() );
956 
957     CalcGradRhoBody body;
958 
959     body.I0 = I0;
960     body.I1w = I1w;
961     body.I1wx = I1wx;
962     body.I1wy = I1wy;
963     body.u1 = u1;
964     body.u2 = u2;
965     body.grad = grad;
966     body.rho_c = rho_c;
967 
968     parallel_for_(Range(0, I0.rows), body);
969 }
970 
971 ////////////////////////////////////////////////////////////
972 // estimateV
973 
974 struct EstimateVBody : ParallelLoopBody
975 {
976     void operator() (const Range& range) const CV_OVERRIDE;
977 
978     Mat_<float> I1wx;
979     Mat_<float> I1wy;
980     Mat_<float> u1;
981     Mat_<float> u2;
982     Mat_<float> u3;
983     Mat_<float> grad;
984     Mat_<float> rho_c;
985     mutable Mat_<float> v1;
986     mutable Mat_<float> v2;
987     mutable Mat_<float> v3;
988     float l_t;
989     float gamma;
990 };
991 
operator ()(const Range & range) const992 void EstimateVBody::operator() (const Range& range) const
993 {
994     bool use_gamma = gamma != 0;
995     for (int y = range.start; y < range.end; ++y)
996     {
997         const float* I1wxRow = I1wx[y];
998         const float* I1wyRow = I1wy[y];
999         const float* u1Row = u1[y];
1000         const float* u2Row = u2[y];
1001         const float* u3Row = use_gamma?u3[y]:NULL;
1002         const float* gradRow = grad[y];
1003         const float* rhoRow = rho_c[y];
1004 
1005         float* v1Row = v1[y];
1006         float* v2Row = v2[y];
1007         float* v3Row = use_gamma ? v3[y]:NULL;
1008 
1009         for (int x = 0; x < I1wx.cols; ++x)
1010         {
1011             const float rho = use_gamma ? rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]) + gamma * u3Row[x] :
1012                                           rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]);
1013             float d1 = 0.0f;
1014             float d2 = 0.0f;
1015             float d3 = 0.0f;
1016             if (rho < -l_t * gradRow[x])
1017             {
1018                 d1 = l_t * I1wxRow[x];
1019                 d2 = l_t * I1wyRow[x];
1020                 if (use_gamma) d3 = l_t * gamma;
1021             }
1022             else if (rho > l_t * gradRow[x])
1023             {
1024                 d1 = -l_t * I1wxRow[x];
1025                 d2 = -l_t * I1wyRow[x];
1026                 if (use_gamma) d3 = -l_t * gamma;
1027             }
1028             else if (gradRow[x] > std::numeric_limits<float>::epsilon())
1029             {
1030                 float fi = -rho / gradRow[x];
1031                 d1 = fi * I1wxRow[x];
1032                 d2 = fi * I1wyRow[x];
1033                 if (use_gamma) d3 = fi * gamma;
1034             }
1035 
1036             v1Row[x] = u1Row[x] + d1;
1037             v2Row[x] = u2Row[x] + d2;
1038             if (use_gamma) v3Row[x] = u3Row[x] + d3;
1039         }
1040     }
1041 }
1042 
estimateV(const Mat_<float> & I1wx,const Mat_<float> & I1wy,const Mat_<float> & u1,const Mat_<float> & u2,const Mat_<float> & u3,const Mat_<float> & grad,const Mat_<float> & rho_c,Mat_<float> & v1,Mat_<float> & v2,Mat_<float> & v3,float l_t,float gamma)1043 static void estimateV(const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2, const Mat_<float>& u3, const Mat_<float>& grad, const Mat_<float>& rho_c,
1044    Mat_<float>& v1, Mat_<float>& v2, Mat_<float>& v3, float l_t, float gamma)
1045 {
1046     CV_DbgAssert( I1wy.size() == I1wx.size() );
1047     CV_DbgAssert( u1.size() == I1wx.size() );
1048     CV_DbgAssert( u2.size() == I1wx.size() );
1049     CV_DbgAssert( grad.size() == I1wx.size() );
1050     CV_DbgAssert( rho_c.size() == I1wx.size() );
1051     CV_DbgAssert( v1.size() == I1wx.size() );
1052     CV_DbgAssert( v2.size() == I1wx.size() );
1053 
1054     EstimateVBody body;
1055     bool use_gamma = gamma != 0;
1056     body.I1wx = I1wx;
1057     body.I1wy = I1wy;
1058     body.u1 = u1;
1059     body.u2 = u2;
1060     if (use_gamma) body.u3 = u3;
1061     body.grad = grad;
1062     body.rho_c = rho_c;
1063     body.v1 = v1;
1064     body.v2 = v2;
1065     if (use_gamma) body.v3 = v3;
1066     body.l_t = l_t;
1067     body.gamma = gamma;
1068     parallel_for_(Range(0, I1wx.rows), body);
1069 }
1070 
1071 ////////////////////////////////////////////////////////////
1072 // estimateU
1073 
estimateU(const Mat_<float> & v1,const Mat_<float> & v2,const Mat_<float> & v3,const Mat_<float> & div_p1,const Mat_<float> & div_p2,const Mat_<float> & div_p3,Mat_<float> & u1,Mat_<float> & u2,Mat_<float> & u3,float theta,float gamma)1074 static float estimateU(const Mat_<float>& v1, const Mat_<float>& v2, const Mat_<float>& v3,
1075             const Mat_<float>& div_p1, const Mat_<float>& div_p2, const Mat_<float>& div_p3,
1076             Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3,
1077             float theta, float gamma)
1078 {
1079     CV_DbgAssert( v2.size() == v1.size() );
1080     CV_DbgAssert( div_p1.size() == v1.size() );
1081     CV_DbgAssert( div_p2.size() == v1.size() );
1082     CV_DbgAssert( u1.size() == v1.size() );
1083     CV_DbgAssert( u2.size() == v1.size() );
1084 
1085     float error = 0.0f;
1086     bool use_gamma = gamma != 0;
1087     for (int y = 0; y < v1.rows; ++y)
1088     {
1089         const float* v1Row = v1[y];
1090         const float* v2Row = v2[y];
1091         const float* v3Row = use_gamma?v3[y]:NULL;
1092         const float* divP1Row = div_p1[y];
1093         const float* divP2Row = div_p2[y];
1094         const float* divP3Row = use_gamma?div_p3[y]:NULL;
1095 
1096         float* u1Row = u1[y];
1097         float* u2Row = u2[y];
1098         float* u3Row = use_gamma?u3[y]:NULL;
1099 
1100 
1101         for (int x = 0; x < v1.cols; ++x)
1102         {
1103             const float u1k = u1Row[x];
1104             const float u2k = u2Row[x];
1105             const float u3k = use_gamma?u3Row[x]:0;
1106 
1107             u1Row[x] = v1Row[x] + theta * divP1Row[x];
1108             u2Row[x] = v2Row[x] + theta * divP2Row[x];
1109             if (use_gamma) u3Row[x] = v3Row[x] + theta * divP3Row[x];
1110             error += use_gamma?(u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k) + (u3Row[x] - u3k) * (u3Row[x] - u3k):
1111                                (u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k);
1112         }
1113     }
1114 
1115     return error;
1116 }
1117 
1118 ////////////////////////////////////////////////////////////
1119 // estimateDualVariables
1120 
1121 struct EstimateDualVariablesBody : ParallelLoopBody
1122 {
1123     void operator() (const Range& range) const CV_OVERRIDE;
1124 
1125     Mat_<float> u1x;
1126     Mat_<float> u1y;
1127     Mat_<float> u2x;
1128     Mat_<float> u2y;
1129     Mat_<float> u3x;
1130     Mat_<float> u3y;
1131     mutable Mat_<float> p11;
1132     mutable Mat_<float> p12;
1133     mutable Mat_<float> p21;
1134     mutable Mat_<float> p22;
1135     mutable Mat_<float> p31;
1136     mutable Mat_<float> p32;
1137     float taut;
1138     bool use_gamma;
1139 };
1140 
operator ()(const Range & range) const1141 void EstimateDualVariablesBody::operator() (const Range& range) const
1142 {
1143     for (int y = range.start; y < range.end; ++y)
1144     {
1145         const float* u1xRow = u1x[y];
1146         const float* u1yRow = u1y[y];
1147         const float* u2xRow = u2x[y];
1148         const float* u2yRow = u2y[y];
1149         const float* u3xRow = u3x[y];
1150         const float* u3yRow = u3y[y];
1151 
1152         float* p11Row = p11[y];
1153         float* p12Row = p12[y];
1154         float* p21Row = p21[y];
1155         float* p22Row = p22[y];
1156         float* p31Row = p31[y];
1157         float* p32Row = p32[y];
1158 
1159         for (int x = 0; x < u1x.cols; ++x)
1160         {
1161             const float g1 = static_cast<float>(hypot(u1xRow[x], u1yRow[x]));
1162             const float g2 = static_cast<float>(hypot(u2xRow[x], u2yRow[x]));
1163 
1164             const float ng1  = 1.0f + taut * g1;
1165             const float ng2 =  1.0f + taut * g2;
1166 
1167             p11Row[x] = (p11Row[x] + taut * u1xRow[x]) / ng1;
1168             p12Row[x] = (p12Row[x] + taut * u1yRow[x]) / ng1;
1169             p21Row[x] = (p21Row[x] + taut * u2xRow[x]) / ng2;
1170             p22Row[x] = (p22Row[x] + taut * u2yRow[x]) / ng2;
1171 
1172             if (use_gamma)
1173             {
1174                 const float g3 = static_cast<float>(hypot(u3xRow[x], u3yRow[x]));
1175                 const float ng3 = 1.0f + taut * g3;
1176                 p31Row[x] = (p31Row[x] + taut * u3xRow[x]) / ng3;
1177                 p32Row[x] = (p32Row[x] + taut * u3yRow[x]) / ng3;
1178             }
1179         }
1180     }
1181 }
1182 
estimateDualVariables(const Mat_<float> & u1x,const Mat_<float> & u1y,const Mat_<float> & u2x,const Mat_<float> & u2y,const Mat_<float> & u3x,const Mat_<float> & u3y,Mat_<float> & p11,Mat_<float> & p12,Mat_<float> & p21,Mat_<float> & p22,Mat_<float> & p31,Mat_<float> & p32,float taut,bool use_gamma)1183 static void estimateDualVariables(const Mat_<float>& u1x, const Mat_<float>& u1y,
1184                      const Mat_<float>& u2x, const Mat_<float>& u2y,
1185                      const Mat_<float>& u3x, const Mat_<float>& u3y,
1186                            Mat_<float>& p11, Mat_<float>& p12,
1187                      Mat_<float>& p21, Mat_<float>& p22,
1188                      Mat_<float>& p31, Mat_<float>& p32,
1189                      float taut, bool use_gamma)
1190 {
1191     CV_DbgAssert( u1y.size() == u1x.size() );
1192     CV_DbgAssert( u2x.size() == u1x.size() );
1193     CV_DbgAssert( u3x.size() == u1x.size() );
1194     CV_DbgAssert( u2y.size() == u1x.size() );
1195     CV_DbgAssert( u3y.size() == u1x.size() );
1196     CV_DbgAssert( p11.size() == u1x.size() );
1197     CV_DbgAssert( p12.size() == u1x.size() );
1198     CV_DbgAssert( p21.size() == u1x.size() );
1199     CV_DbgAssert( p22.size() == u1x.size() );
1200     CV_DbgAssert( p31.size() == u1x.size() );
1201     CV_DbgAssert( p32.size() == u1x.size() );
1202 
1203     EstimateDualVariablesBody body;
1204 
1205     body.u1x = u1x;
1206     body.u1y = u1y;
1207     body.u2x = u2x;
1208     body.u2y = u2y;
1209     body.u3x = u3x;
1210     body.u3y = u3y;
1211     body.p11 = p11;
1212     body.p12 = p12;
1213     body.p21 = p21;
1214     body.p22 = p22;
1215     body.p31 = p31;
1216     body.p32 = p32;
1217     body.taut = taut;
1218     body.use_gamma = use_gamma;
1219 
1220     parallel_for_(Range(0, u1x.rows), body);
1221 }
1222 
1223 #ifdef HAVE_OPENCL
procOneScale_ocl(const UMat & I0,const UMat & I1,UMat & u1,UMat & u2)1224 bool OpticalFlowDual_TVL1::procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2)
1225 {
1226     using namespace cv_ocl_tvl1flow;
1227 
1228     const double scaledEpsilon = epsilon * epsilon * I0.size().area();
1229 
1230     CV_DbgAssert(I1.size() == I0.size());
1231     CV_DbgAssert(I1.type() == I0.type());
1232     CV_DbgAssert(u1.empty() || u1.size() == I0.size());
1233     CV_DbgAssert(u2.size() == u1.size());
1234 
1235     if (u1.empty())
1236     {
1237         u1.create(I0.size(), CV_32FC1);
1238         u1.setTo(Scalar::all(0));
1239 
1240         u2.create(I0.size(), CV_32FC1);
1241         u2.setTo(Scalar::all(0));
1242     }
1243 
1244     UMat I1x = dum.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
1245     UMat I1y = dum.I1y_buf(Rect(0, 0, I0.cols, I0.rows));
1246 
1247     if (!centeredGradient(I1, I1x, I1y))
1248         return false;
1249 
1250     UMat I1w = dum.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
1251     UMat I1wx = dum.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
1252     UMat I1wy = dum.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
1253 
1254     UMat grad = dum.grad_buf(Rect(0, 0, I0.cols, I0.rows));
1255     UMat rho_c = dum.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
1256 
1257     UMat p11 = dum.p11_buf(Rect(0, 0, I0.cols, I0.rows));
1258     UMat p12 = dum.p12_buf(Rect(0, 0, I0.cols, I0.rows));
1259     UMat p21 = dum.p21_buf(Rect(0, 0, I0.cols, I0.rows));
1260     UMat p22 = dum.p22_buf(Rect(0, 0, I0.cols, I0.rows));
1261     p11.setTo(Scalar::all(0));
1262     p12.setTo(Scalar::all(0));
1263     p21.setTo(Scalar::all(0));
1264     p22.setTo(Scalar::all(0));
1265 
1266     UMat diff = dum.diff_buf(Rect(0, 0, I0.cols, I0.rows));
1267 
1268     const float l_t = static_cast<float>(lambda * theta);
1269     const float taut = static_cast<float>(tau / theta);
1270     int n;
1271 
1272     for (int warpings = 0; warpings < warps; ++warpings)
1273     {
1274         if (!warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c))
1275             return false;
1276 
1277         double error = std::numeric_limits<double>::max();
1278         double prev_error = 0;
1279 
1280         for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
1281         {
1282             if (medianFiltering > 1) {
1283                 cv::medianBlur(u1, u1, medianFiltering);
1284                 cv::medianBlur(u2, u2, medianFiltering);
1285             }
1286             for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
1287             {
1288                 // some tweaks to make sum operation less frequently
1289                 n = n_inner + n_outer*innerIterations;
1290                 char calc_error = (n & 0x1) && (prev_error < scaledEpsilon);
1291                 if (!estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
1292                     u1, u2, diff, l_t, static_cast<float>(theta), calc_error))
1293                     return false;
1294                 if (calc_error)
1295                 {
1296                     error = cv::sum(diff)[0];
1297                     prev_error = error;
1298                 }
1299                 else
1300                 {
1301                     error = std::numeric_limits<double>::max();
1302                     prev_error -= scaledEpsilon;
1303                 }
1304                 if (!estimateDualVariables(u1, u2, p11, p12, p21, p22, taut))
1305                     return false;
1306             }
1307         }
1308     }
1309     return true;
1310 }
1311 #endif
1312 
procOneScale(const Mat_<float> & I0,const Mat_<float> & I1,Mat_<float> & u1,Mat_<float> & u2,Mat_<float> & u3)1313 void OpticalFlowDual_TVL1::procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3)
1314 {
1315     const float scaledEpsilon = static_cast<float>(epsilon * epsilon * I0.size().area());
1316 
1317     CV_DbgAssert( I1.size() == I0.size() );
1318     CV_DbgAssert( I1.type() == I0.type() );
1319     CV_DbgAssert( u1.size() == I0.size() );
1320     CV_DbgAssert( u2.size() == u1.size() );
1321 
1322     Mat_<float> I1x = dm.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
1323     Mat_<float> I1y = dm.I1y_buf(Rect(0, 0, I0.cols, I0.rows));
1324     centeredGradient(I1, I1x, I1y);
1325 
1326     Mat_<float> flowMap1 = dm.flowMap1_buf(Rect(0, 0, I0.cols, I0.rows));
1327     Mat_<float> flowMap2 = dm.flowMap2_buf(Rect(0, 0, I0.cols, I0.rows));
1328 
1329     Mat_<float> I1w = dm.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
1330     Mat_<float> I1wx = dm.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
1331     Mat_<float> I1wy = dm.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
1332 
1333     Mat_<float> grad = dm.grad_buf(Rect(0, 0, I0.cols, I0.rows));
1334     Mat_<float> rho_c = dm.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
1335 
1336     Mat_<float> v1 = dm.v1_buf(Rect(0, 0, I0.cols, I0.rows));
1337     Mat_<float> v2 = dm.v2_buf(Rect(0, 0, I0.cols, I0.rows));
1338     Mat_<float> v3 = dm.v3_buf(Rect(0, 0, I0.cols, I0.rows));
1339 
1340     Mat_<float> p11 = dm.p11_buf(Rect(0, 0, I0.cols, I0.rows));
1341     Mat_<float> p12 = dm.p12_buf(Rect(0, 0, I0.cols, I0.rows));
1342     Mat_<float> p21 = dm.p21_buf(Rect(0, 0, I0.cols, I0.rows));
1343     Mat_<float> p22 = dm.p22_buf(Rect(0, 0, I0.cols, I0.rows));
1344     Mat_<float> p31 = dm.p31_buf(Rect(0, 0, I0.cols, I0.rows));
1345     Mat_<float> p32 = dm.p32_buf(Rect(0, 0, I0.cols, I0.rows));
1346     p11.setTo(Scalar::all(0));
1347     p12.setTo(Scalar::all(0));
1348     p21.setTo(Scalar::all(0));
1349     p22.setTo(Scalar::all(0));
1350     bool use_gamma = gamma != 0.;
1351     if (use_gamma) p31.setTo(Scalar::all(0));
1352     if (use_gamma) p32.setTo(Scalar::all(0));
1353 
1354     Mat_<float> div_p1 = dm.div_p1_buf(Rect(0, 0, I0.cols, I0.rows));
1355     Mat_<float> div_p2 = dm.div_p2_buf(Rect(0, 0, I0.cols, I0.rows));
1356     Mat_<float> div_p3 = dm.div_p3_buf(Rect(0, 0, I0.cols, I0.rows));
1357 
1358     Mat_<float> u1x = dm.u1x_buf(Rect(0, 0, I0.cols, I0.rows));
1359     Mat_<float> u1y = dm.u1y_buf(Rect(0, 0, I0.cols, I0.rows));
1360     Mat_<float> u2x = dm.u2x_buf(Rect(0, 0, I0.cols, I0.rows));
1361     Mat_<float> u2y = dm.u2y_buf(Rect(0, 0, I0.cols, I0.rows));
1362     Mat_<float> u3x = dm.u3x_buf(Rect(0, 0, I0.cols, I0.rows));
1363     Mat_<float> u3y = dm.u3y_buf(Rect(0, 0, I0.cols, I0.rows));
1364 
1365     const float l_t = static_cast<float>(lambda * theta);
1366     const float taut = static_cast<float>(tau / theta);
1367 
1368     for (int warpings = 0; warpings < warps; ++warpings)
1369     {
1370         // compute the warping of the target image and its derivatives
1371         buildFlowMap(u1, u2, flowMap1, flowMap2);
1372         remap(I1, I1w, flowMap1, flowMap2, INTER_CUBIC);
1373         remap(I1x, I1wx, flowMap1, flowMap2, INTER_CUBIC);
1374         remap(I1y, I1wy, flowMap1, flowMap2, INTER_CUBIC);
1375         //calculate I1(x+u0) and its gradient
1376         calcGradRho(I0, I1w, I1wx, I1wy, u1, u2, grad, rho_c);
1377 
1378         float error = std::numeric_limits<float>::max();
1379         for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
1380         {
1381             if (medianFiltering > 1) {
1382                 cv::medianBlur(u1, u1, medianFiltering);
1383                 cv::medianBlur(u2, u2, medianFiltering);
1384             }
1385             for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
1386             {
1387                 // estimate the values of the variable (v1, v2) (thresholding operator TH)
1388                 estimateV(I1wx, I1wy, u1, u2, u3, grad, rho_c, v1, v2, v3, l_t, static_cast<float>(gamma));
1389 
1390                 // compute the divergence of the dual variable (p1, p2, p3)
1391                 divergence(p11, p12, div_p1);
1392                 divergence(p21, p22, div_p2);
1393                 if (use_gamma) divergence(p31, p32, div_p3);
1394 
1395                 // estimate the values of the optical flow (u1, u2)
1396                 error = estimateU(v1, v2, v3, div_p1, div_p2, div_p3, u1, u2, u3, static_cast<float>(theta), static_cast<float>(gamma));
1397 
1398                 // compute the gradient of the optical flow (Du1, Du2)
1399                 forwardGradient(u1, u1x, u1y);
1400                 forwardGradient(u2, u2x, u2y);
1401                 if (use_gamma) forwardGradient(u3, u3x, u3y);
1402 
1403                 // estimate the values of the dual variable (p1, p2, p3)
1404                 estimateDualVariables(u1x, u1y, u2x, u2y, u3x, u3y, p11, p12, p21, p22, p31, p32, taut, use_gamma);
1405             }
1406         }
1407     }
1408 }
1409 
collectGarbage()1410 void OpticalFlowDual_TVL1::collectGarbage()
1411 {
1412     //dataMat structure dm
1413     dm.I0s.clear();
1414     dm.I1s.clear();
1415     dm.u1s.clear();
1416     dm.u2s.clear();
1417 
1418     dm.I1x_buf.release();
1419     dm.I1y_buf.release();
1420 
1421     dm.flowMap1_buf.release();
1422     dm.flowMap2_buf.release();
1423 
1424     dm.I1w_buf.release();
1425     dm.I1wx_buf.release();
1426     dm.I1wy_buf.release();
1427 
1428     dm.grad_buf.release();
1429     dm.rho_c_buf.release();
1430 
1431     dm.v1_buf.release();
1432     dm.v2_buf.release();
1433 
1434     dm.p11_buf.release();
1435     dm.p12_buf.release();
1436     dm.p21_buf.release();
1437     dm.p22_buf.release();
1438 
1439     dm.div_p1_buf.release();
1440     dm.div_p2_buf.release();
1441 
1442     dm.u1x_buf.release();
1443     dm.u1y_buf.release();
1444     dm.u2x_buf.release();
1445     dm.u2y_buf.release();
1446 
1447 #ifdef HAVE_OPENCL
1448     //dataUMat structure dum
1449     dum.I0s.clear();
1450     dum.I1s.clear();
1451     dum.u1s.clear();
1452     dum.u2s.clear();
1453 
1454     dum.I1x_buf.release();
1455     dum.I1y_buf.release();
1456 
1457     dum.I1w_buf.release();
1458     dum.I1wx_buf.release();
1459     dum.I1wy_buf.release();
1460 
1461     dum.grad_buf.release();
1462     dum.rho_c_buf.release();
1463 
1464     dum.p11_buf.release();
1465     dum.p12_buf.release();
1466     dum.p21_buf.release();
1467     dum.p22_buf.release();
1468 
1469     dum.diff_buf.release();
1470     dum.norm_buf.release();
1471 #endif
1472 }
1473 
createOptFlow_DualTVL1()1474 Ptr<DualTVL1OpticalFlow> createOptFlow_DualTVL1()
1475 {
1476     return makePtr<OpticalFlowDual_TVL1>();
1477 }
1478 
create(double tau,double lambda,double theta,int nscales,int warps,double epsilon,int innerIterations,int outerIterations,double scaleStep,double gamma,int medianFilter,bool useInitialFlow)1479 Ptr<DualTVL1OpticalFlow> DualTVL1OpticalFlow::create(
1480     double tau, double lambda, double theta, int nscales, int warps,
1481     double epsilon, int innerIterations, int outerIterations, double scaleStep,
1482     double gamma, int medianFilter, bool useInitialFlow)
1483 {
1484     return makePtr<OpticalFlowDual_TVL1>(tau, lambda, theta, nscales, warps,
1485                                          epsilon, innerIterations, outerIterations,
1486                                          scaleStep, gamma, medianFilter, useInitialFlow);
1487 }
1488 
1489 }
1490 }
1491