1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                           License Agreement
11//                For Open Source Computer Vision Library
12//
13// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15// Third party copyrights are property of their respective owners.
16//
17// @Authors
18//    Wu Zailong, bullet@yeah.net
19//
20// Redistribution and use in source and binary forms, with or without modification,
21// are permitted provided that the following conditions are met:
22//
23//   * Redistribution's of source code must retain the above copyright notice,
24//     this list of conditions and the following disclaimer.
25//
26//   * Redistribution's in binary form must reproduce the above copyright notice,
27//     this list of conditions and the following disclaimer in the documentation
28//     and/or other materials provided with the distribution.
29//
30//   * The name of the copyright holders may not be used to endorse or promote products
31//     derived from this software without specific prior written permission.
32//
33// This software is provided by the copyright holders and contributors as is and
34// any express or implied warranties, including, but not limited to, the implied
35// warranties of merchantability and fitness for a particular purpose are disclaimed.
36// In no event shall the Intel Corporation or contributors be liable for any direct,
37// indirect, incidental, special, exemplary, or consequential damages
38// (including, but not limited to, procurement of substitute goods or services;
39// loss of use, data, or profits; or business interruption) however caused
40// and on any theory of liability, whether in contract, strict liability,
41// or tort (including negligence or otherwise) arising in any way out of
42// the use of this software, even if advised of the possibility of such damage.
43//
44//M*/
45
46#ifdef DOUBLE_SUPPORT
47#ifdef cl_amd_fp64
48#pragma OPENCL EXTENSION cl_amd_fp64:enable
49#elif defined (cl_khr_fp64)
50#pragma OPENCL EXTENSION cl_khr_fp64:enable
51#endif
52#endif
53
54#define noconvert
55
56#if cn != 3
57#define loadpix(addr)  *(__global const T*)(addr)
58#define storepix(val, addr)  *(__global T*)(addr) = val
59#define TSIZE ((int)sizeof(T))
60#define convertScalar(a) (a)
61#else
62#define loadpix(addr)  vload3(0, (__global const T1*)(addr))
63#define storepix(val, addr) vstore3(val, 0, (__global T1*)(addr))
64#define TSIZE ((int)sizeof(T1)*3)
65#define convertScalar(a) (T)(a.x, a.y, a.z)
66#endif
67
68enum
69{
70    INTER_BITS = 5,
71    INTER_TAB_SIZE = 1 << INTER_BITS,
72    INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE
73};
74
75#ifdef INTER_NEAREST
76#define convertToWT
77#endif
78
79#ifdef BORDER_CONSTANT
80#define EXTRAPOLATE(v2, v) v = scalar;
81#elif defined BORDER_REPLICATE
82#define EXTRAPOLATE(v2, v) \
83    { \
84        v2 = max(min(v2, (int2)(src_cols - 1, src_rows - 1)), (int2)(0)); \
85        v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \
86    }
87#elif defined BORDER_WRAP
88#define EXTRAPOLATE(v2, v) \
89    { \
90        if (v2.x < 0) \
91            v2.x -= ((v2.x - src_cols + 1) / src_cols) * src_cols; \
92        if (v2.x >= src_cols) \
93            v2.x %= src_cols; \
94        \
95        if (v2.y < 0) \
96            v2.y -= ((v2.y - src_rows + 1) / src_rows) * src_rows; \
97        if( v2.y >= src_rows ) \
98            v2.y %= src_rows; \
99        v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \
100    }
101#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
102#ifdef BORDER_REFLECT
103#define DELTA int delta = 0
104#else
105#define DELTA int delta = 1
106#endif
107#define EXTRAPOLATE(v2, v) \
108    { \
109        DELTA; \
110        if (src_cols == 1) \
111            v2.x = 0; \
112        else \
113            do \
114            { \
115                if( v2.x < 0 ) \
116                    v2.x = -v2.x - 1 + delta; \
117                else \
118                    v2.x = src_cols - 1 - (v2.x - src_cols) - delta; \
119            } \
120            while (v2.x >= src_cols || v2.x < 0); \
121        \
122        if (src_rows == 1) \
123            v2.y = 0; \
124        else \
125            do \
126            { \
127                if( v2.y < 0 ) \
128                    v2.y = -v2.y - 1 + delta; \
129                else \
130                    v2.y = src_rows - 1 - (v2.y - src_rows) - delta; \
131            } \
132            while (v2.y >= src_rows || v2.y < 0); \
133        v = convertToWT(loadpix((__global const T*)(srcptr + mad24(v2.y, src_step, v2.x * TSIZE + src_offset)))); \
134    }
135#else
136#error No extrapolation method
137#endif
138
139#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0)
140
141#ifdef INTER_NEAREST
142
143__kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
144                            __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
145                            __global const uchar * map1ptr, int map1_step, int map1_offset,
146                            __global const uchar * map2ptr, int map2_step, int map2_offset,
147                            ST nVal)
148{
149    int x = get_global_id(0);
150    int y = get_global_id(1) * rowsPerWI;
151
152    if (x < dst_cols)
153    {
154        T scalar = convertScalar(nVal);
155
156        int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(float), map1_offset));
157        int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(float), map2_offset));
158        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
159
160        #pragma unroll
161        for (int i = 0; i < rowsPerWI; ++i, ++y,
162            map1_index += map1_step, map2_index += map2_step, dst_index += dst_step)
163            if (y < dst_rows)
164            {
165                __global const float * map1 = (__global const float *)(map1ptr + map1_index);
166                __global const float * map2 = (__global const float *)(map2ptr + map2_index);
167                __global T * dst = (__global T *)(dstptr + dst_index);
168
169                int gx = convert_int_sat_rte(map1[0]);
170                int gy = convert_int_sat_rte(map2[0]);
171
172                if (NEED_EXTRAPOLATION(gx, gy))
173                {
174#ifndef BORDER_CONSTANT
175                    int2 gxy = (int2)(gx, gy);
176#endif
177                    T v;
178                    EXTRAPOLATE(gxy, v)
179                    storepix(v, dst);
180                }
181                else
182                {
183                    int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset));
184                    storepix(loadpix((__global const T*)(srcptr + src_index)), dst);
185                }
186            }
187    }
188}
189
190__kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
191                          __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
192                          __global const uchar * mapptr, int map_step, int map_offset,
193                          ST nVal)
194{
195    int x = get_global_id(0);
196    int y = get_global_id(1) * rowsPerWI;
197
198    if (x < dst_cols)
199    {
200        T scalar = convertScalar(nVal);
201        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
202        int map_index = mad24(y, map_step, mad24(x, (int)sizeof(float2), map_offset));
203
204        #pragma unroll
205        for (int i = 0; i < rowsPerWI; ++i, ++y,
206            map_index += map_step, dst_index += dst_step)
207            if (y < dst_rows)
208            {
209                __global const float2 * map = (__global const float2 *)(mapptr + map_index);
210                __global T * dst = (__global T *)(dstptr + dst_index);
211
212                int2 gxy = convert_int2_sat_rte(map[0]);
213                int gx = gxy.x, gy = gxy.y;
214
215                if (NEED_EXTRAPOLATION(gx, gy))
216                {
217                    T v;
218                    EXTRAPOLATE(gxy, v)
219                    storepix(v, dst);
220                }
221                else
222                {
223                    int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset));
224                    storepix(loadpix((__global const T *)(srcptr + src_index)), dst);
225                }
226        }
227    }
228}
229
230__kernel void remap_16SC2(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
231                          __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
232                          __global const uchar * mapptr, int map_step, int map_offset,
233                          ST nVal)
234{
235    int x = get_global_id(0);
236    int y = get_global_id(1) * rowsPerWI;
237
238    if (x < dst_cols)
239    {
240        T scalar = convertScalar(nVal);
241        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
242        int map_index = mad24(y, map_step, mad24(x, (int)sizeof(short2), map_offset));
243
244        #pragma unroll
245        for (int i = 0; i < rowsPerWI; ++i, ++y,
246            map_index += map_step, dst_index += dst_step)
247            if (y < dst_rows)
248            {
249                __global const short2 * map = (__global const short2 *)(mapptr + map_index);
250                __global T * dst = (__global T *)(dstptr + dst_index);
251
252                int2 gxy = convert_int2(map[0]);
253                int gx = gxy.x, gy = gxy.y;
254
255                if (NEED_EXTRAPOLATION(gx, gy))
256                {
257                    T v;
258                    EXTRAPOLATE(gxy, v)
259                    storepix(v, dst);
260                }
261                else
262                {
263                    int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset));
264                    storepix(loadpix((__global const T *)(srcptr + src_index)), dst);
265                }
266            }
267    }
268}
269
270__kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
271                                __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
272                                __global const uchar * map1ptr, int map1_step, int map1_offset,
273                                __global const uchar * map2ptr, int map2_step, int map2_offset,
274                                ST nVal)
275{
276    int x = get_global_id(0);
277    int y = get_global_id(1) * rowsPerWI;
278
279    if (x < dst_cols)
280    {
281        T scalar = convertScalar(nVal);
282        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
283        int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(short2), map1_offset));
284        int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(ushort), map2_offset));
285
286        #pragma unroll
287        for (int i = 0; i < rowsPerWI; ++i, ++y,
288            map1_index += map1_step, map2_index += map2_step, dst_index += dst_step)
289            if (y < dst_rows)
290            {
291                __global const short2 * map1 = (__global const short2 *)(map1ptr + map1_index);
292                __global const ushort * map2 = (__global const ushort *)(map2ptr + map2_index);
293                __global T * dst = (__global T *)(dstptr + dst_index);
294
295                int map2Value = convert_int(map2[0]) & (INTER_TAB_SIZE2 - 1);
296                int dx = (map2Value & (INTER_TAB_SIZE - 1)) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
297                int dy = (map2Value >> INTER_BITS) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
298                int2 gxy = convert_int2(map1[0]) + (int2)(dx, dy);
299                int gx = gxy.x, gy = gxy.y;
300
301                if (NEED_EXTRAPOLATION(gx, gy))
302                {
303                    T v;
304                    EXTRAPOLATE(gxy, v)
305                    storepix(v, dst);
306                }
307                else
308                {
309                    int src_index = mad24(gy, src_step, mad24(gx, TSIZE, src_offset));
310                    storepix(loadpix((__global const T *)(srcptr + src_index)), dst);
311                }
312            }
313    }
314}
315
316#elif defined INTER_LINEAR
317
318__constant float coeffs[64] =
319{ 1.000000f, 0.000000f, 0.968750f, 0.031250f, 0.937500f, 0.062500f, 0.906250f, 0.093750f, 0.875000f, 0.125000f, 0.843750f, 0.156250f,
320  0.812500f, 0.187500f, 0.781250f, 0.218750f, 0.750000f, 0.250000f, 0.718750f, 0.281250f, 0.687500f, 0.312500f, 0.656250f, 0.343750f,
321  0.625000f, 0.375000f, 0.593750f, 0.406250f, 0.562500f, 0.437500f, 0.531250f, 0.468750f, 0.500000f, 0.500000f, 0.468750f, 0.531250f,
322  0.437500f, 0.562500f, 0.406250f, 0.593750f, 0.375000f, 0.625000f, 0.343750f, 0.656250f, 0.312500f, 0.687500f, 0.281250f, 0.718750f,
323  0.250000f, 0.750000f, 0.218750f, 0.781250f, 0.187500f, 0.812500f, 0.156250f, 0.843750f, 0.125000f, 0.875000f, 0.093750f, 0.906250f,
324  0.062500f, 0.937500f, 0.031250f, 0.968750f };
325
326__kernel void remap_16SC2_16UC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
327                                __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
328                                __global const uchar * map1ptr, int map1_step, int map1_offset,
329                                __global const uchar * map2ptr, int map2_step, int map2_offset,
330                                ST nVal)
331{
332    int x = get_global_id(0);
333    int y = get_global_id(1) * rowsPerWI;
334
335    if (x < dst_cols)
336    {
337        WT scalar = convertToWT(convertScalar(nVal));
338        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
339        int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(short2), map1_offset));
340        int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(ushort), map2_offset));
341
342        #pragma unroll
343        for (int i = 0; i < rowsPerWI; ++i, ++y,
344            map1_index += map1_step, map2_index += map2_step, dst_index += dst_step)
345            if (y < dst_rows)
346            {
347                __global const short2 * map1 = (__global const short2 *)(map1ptr + map1_index);
348                __global const ushort * map2 = (__global const ushort *)(map2ptr + map2_index);
349                __global T * dst = (__global T *)(dstptr + dst_index);
350
351                int2 map_dataA = convert_int2(map1[0]);
352                int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
353                int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
354                int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
355
356                ushort map2Value = (ushort)(map2[0] & (INTER_TAB_SIZE2 - 1));
357                WT2 u = (WT2)(map2Value & (INTER_TAB_SIZE - 1), map2Value >> INTER_BITS) / (WT2)(INTER_TAB_SIZE);
358
359                WT a = scalar, b = scalar, c = scalar, d = scalar;
360
361                if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
362                    a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset))));
363                else
364                    EXTRAPOLATE(map_dataA, a);
365
366                if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
367                    b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset))));
368                else
369                    EXTRAPOLATE(map_dataB, b);
370
371                if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
372                    c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset))));
373                else
374                    EXTRAPOLATE(map_dataC, c);
375
376                if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
377                    d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset))));
378                else
379                    EXTRAPOLATE(map_dataD, d);
380
381                WT dst_data = a * (1 - u.x) * (1 - u.y) +
382                              b * (u.x)     * (1 - u.y) +
383                              c * (1 - u.x) * (u.y) +
384                              d * (u.x)     * (u.y);
385                storepix(convertToT(dst_data), dst);
386            }
387    }
388}
389
390__kernel void remap_2_32FC1(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
391                            __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
392                            __global const uchar * map1ptr, int map1_step, int map1_offset,
393                            __global const uchar * map2ptr, int map2_step, int map2_offset,
394                            ST nVal)
395{
396    int x = get_global_id(0);
397    int y = get_global_id(1) * rowsPerWI;
398
399    if (x < dst_cols)
400    {
401        WT scalar = convertToWT(convertScalar(nVal));
402        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
403        int map1_index = mad24(y, map1_step, mad24(x, (int)sizeof(float), map1_offset));
404        int map2_index = mad24(y, map2_step, mad24(x, (int)sizeof(float), map2_offset));
405
406        #pragma unroll
407        for (int i = 0; i < rowsPerWI; ++i, ++y,
408            map1_index += map1_step, map2_index += map2_step, dst_index += dst_step)
409            if (y < dst_rows)
410            {
411                __global const float * map1 = (__global const float *)(map1ptr + map1_index);
412                __global const float * map2 = (__global const float *)(map2ptr + map2_index);
413                __global T * dst = (__global T *)(dstptr + dst_index);
414
415#if defined BORDER_CONSTANT
416                float xf = map1[0], yf = map2[0];
417                int sx = convert_int_sat_rtz(mad(xf, (float)INTER_TAB_SIZE, 0.5f)) >> INTER_BITS;
418                int sy = convert_int_sat_rtz(mad(yf, (float)INTER_TAB_SIZE, 0.5f)) >> INTER_BITS;
419
420                __constant float * coeffs_x = coeffs + ((convert_int_rte(xf * INTER_TAB_SIZE) & (INTER_TAB_SIZE - 1)) << 1);
421                __constant float * coeffs_y = coeffs + ((convert_int_rte(yf * INTER_TAB_SIZE) & (INTER_TAB_SIZE - 1)) << 1);
422
423                WT sum = (WT)(0), xsum;
424                int src_index = mad24(sy, src_step, mad24(sx, TSIZE, src_offset));
425
426                #pragma unroll
427                for (int yp = 0; yp < 2; ++yp, src_index += src_step)
428                {
429                    if (sy + yp >= 0 && sy + yp < src_rows)
430                    {
431                        xsum = (WT)(0);
432                        if (sx >= 0 && sx + 2 < src_cols)
433                        {
434#if depth == 0 && cn == 1
435                            uchar2 value = vload2(0, srcptr + src_index);
436                            xsum = dot(convert_float2(value), (float2)(coeffs_x[0], coeffs_x[1]));
437#else
438                            #pragma unroll
439                            for (int xp = 0; xp < 2; ++xp)
440                                xsum = fma(convertToWT(loadpix(srcptr + mad24(xp, TSIZE, src_index))), coeffs_x[xp], xsum);
441#endif
442                        }
443                        else
444                        {
445                            #pragma unroll
446                            for (int xp = 0; xp < 2; ++xp)
447                                xsum = fma(sx + xp >= 0 && sx + xp < src_cols ?
448                                           convertToWT(loadpix(srcptr + mad24(xp, TSIZE, src_index))) : scalar, coeffs_x[xp], xsum);
449                        }
450                        sum = fma(xsum, coeffs_y[yp], sum);
451                    }
452                    else
453                        sum = fma(scalar, coeffs_y[yp], sum);
454                }
455
456                storepix(convertToT(sum), dst);
457#else
458                float2 map_data = (float2)(map1[0], map2[0]);
459
460                int2 map_dataA = convert_int2_sat_rtn(map_data);
461                int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
462                int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
463                int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
464
465                float2 _u = map_data - convert_float2(map_dataA);
466                WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
467                WT scalar = convertToWT(convertScalar(nVal));
468                WT a = scalar, b = scalar, c = scalar, d = scalar;
469
470                if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
471                    a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset))));
472                else
473                    EXTRAPOLATE(map_dataA, a);
474
475                if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
476                    b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset))));
477                else
478                    EXTRAPOLATE(map_dataB, b);
479
480                if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
481                    c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset))));
482                else
483                    EXTRAPOLATE(map_dataC, c);
484
485                if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
486                    d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset))));
487                else
488                    EXTRAPOLATE(map_dataD, d);
489
490                WT dst_data = a * (1 - u.x) * (1 - u.y) +
491                              b * (u.x)     * (1 - u.y) +
492                              c * (1 - u.x) * (u.y) +
493                              d * (u.x)     * (u.y);
494                storepix(convertToT(dst_data), dst);
495#endif
496            }
497    }
498}
499
500__kernel void remap_32FC2(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
501                          __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
502                          __global const uchar * mapptr, int map_step, int map_offset,
503                          ST nVal)
504{
505    int x = get_global_id(0);
506    int y = get_global_id(1) * rowsPerWI;
507
508    if (x < dst_cols)
509    {
510        WT scalar = convertToWT(convertScalar(nVal));
511        int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
512        int map_index = mad24(y, map_step, mad24(x, (int)sizeof(float2), map_offset));
513
514        #pragma unroll
515        for (int i = 0; i < rowsPerWI; ++i, ++y,
516            map_index += map_step, dst_index += dst_step)
517            if (y < dst_rows)
518            {
519                __global const float2 * map = (__global const float2 *)(mapptr + map_index);
520                __global T * dst = (__global T *)(dstptr + dst_index);
521
522                float2 map_data = map[0];
523                int2 map_dataA = convert_int2_sat_rtn(map_data);
524                int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
525                int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
526                int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
527
528                float2 _u = map_data - convert_float2(map_dataA);
529                WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
530                WT a = scalar, b = scalar, c = scalar, d = scalar;
531
532                if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
533                    a = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataA.y, src_step, map_dataA.x * TSIZE + src_offset))));
534                else
535                    EXTRAPOLATE(map_dataA, a);
536
537                if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
538                    b = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataB.y, src_step, map_dataB.x * TSIZE + src_offset))));
539                else
540                    EXTRAPOLATE(map_dataB, b);
541
542                if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
543                    c = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataC.y, src_step, map_dataC.x * TSIZE + src_offset))));
544                else
545                    EXTRAPOLATE(map_dataC, c);
546
547                if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
548                    d = convertToWT(loadpix((__global const T *)(srcptr + mad24(map_dataD.y, src_step, map_dataD.x * TSIZE + src_offset))));
549                else
550                    EXTRAPOLATE(map_dataD, d);
551
552                WT dst_data = a * (1 - u.x) * (1 - u.y) +
553                              b * (u.x)     * (1 - u.y) +
554                              c * (1 - u.x) * (u.y) +
555                              d * (u.x)     * (u.y);
556                storepix(convertToT(dst_data), dst);
557            }
558    }
559}
560
561#endif
562