1 /*
2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 
27 /*
28  * FUNCTION
29  *      Image affine transformation with Bicubic filtering
30  * SYNOPSIS
31  *      mlib_status mlib_ImageAffine_[u8|s16|u16]_?ch_bc(mlib_s32 *leftEdges,
32  *                                                       mlib_s32 *rightEdges,
33  *                                                       mlib_s32 *xStarts,
34  *                                                       mlib_s32 *yStarts,
35  *                                                       mlib_s32 *sides,
36  *                                                       mlib_u8  *dstData,
37  *                                                       mlib_u8  **lineAddr,
38  *                                                       mlib_s32 dstYStride,
39  *                                                       mlib_s32 is_affine,
40  *                                                       mlib_s32 srcYStride,
41  *                                                       mlib_filter filter)
42  *
43  * ARGUMENTS
44  *      leftEdges  array[dstHeight] of xLeft coordinates
45  *      RightEdges array[dstHeight] of xRight coordinates
46  *      xStarts    array[dstHeight] of xStart * 65536 coordinates
47  *      yStarts    array[dstHeight] of yStart * 65536 coordinates
48  *      sides      output array[4]. sides[0] is yStart, sides[1] is yFinish,
49  *                 sides[2] is dx * 65536, sides[3] is dy * 65536
50  *      dstData    pointer to the first pixel on (yStart - 1) line
51  *      lineAddr   array[srcHeight] of pointers to the first pixel on
52  *                 the corresponding lines
53  *      dstYStride stride of destination image
54  *      is_affine  indicator (Affine - GridWarp)
55  *      srcYStride stride of source image
56  *      filter     type of resampling filter
57  *
58  * DESCRIPTION
59  *      The functions step along the lines from xLeft to xRight and apply
60  *      the bicubic filtering.
61  *
62  */
63 
64 #include "mlib_ImageAffine.h"
65 
66 #define DTYPE  mlib_u16
67 
68 #define FUN_NAME(CHAN) mlib_ImageAffine_u16_##CHAN##_bc
69 
70 #define FILTER_BITS   9
71 
72 /***************************************************************/
73 /* for x86, using integer multiplies is faster */
74 
75 #define SHIFT_X  15
76 #define ROUND_X  0 /* (1 << (SHIFT_X - 1)) */
77 
78 #define SHIFT_Y  14
79 #define ROUND_Y  (1 << (SHIFT_Y - 1))
80 
81 #define S32_TO_U16_SAT(DST)                                     \
82   if (val0 >= MLIB_U16_MAX)                                     \
83     DST = MLIB_U16_MAX;                                         \
84   else if (val0 <= MLIB_U16_MIN)                                \
85     DST = MLIB_U16_MIN;                                         \
86   else                                                          \
87     DST = (mlib_u16)val0
88 
89 /***************************************************************/
90 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
91 {
92   DECLAREVAR_BC();
93   DTYPE *dstLineEnd;
94   const mlib_s16 *mlib_filters_table;
95 
96   if (filter == MLIB_BICUBIC) {
97     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
98   }
99   else {
100     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
101   }
102 
103   for (j = yStart; j <= yFinish; j++) {
104     mlib_s32 xf0, xf1, xf2, xf3;
105     mlib_s32 yf0, yf1, yf2, yf3;
106     mlib_s32 c0, c1, c2, c3, val0;
107     mlib_s32 filterpos;
108     mlib_s16 *fptr;
109     mlib_s32 s0, s1, s2, s3;
110     mlib_s32 s4, s5, s6, s7;
111 
112     CLIP(1);
113     dstLineEnd = (DTYPE *) dstData + xRight;
114 
115     filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
116     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
117 
118     xf0 = fptr[0] >> 1;
119     xf1 = fptr[1] >> 1;
120     xf2 = fptr[2] >> 1;
121     xf3 = fptr[3] >> 1;
122 
123     filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
124     fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
125 
126     yf0 = fptr[0];
127     yf1 = fptr[1];
128     yf2 = fptr[2];
129     yf3 = fptr[3];
130 
131     xSrc = (X >> MLIB_SHIFT) - 1;
132     ySrc = (Y >> MLIB_SHIFT) - 1;
133 
134     srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
135     s0 = srcPixelPtr[0];
136     s1 = srcPixelPtr[1];
137     s2 = srcPixelPtr[2];
138     s3 = srcPixelPtr[3];
139 
140     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
141     s4 = srcPixelPtr[0];
142     s5 = srcPixelPtr[1];
143     s6 = srcPixelPtr[2];
144     s7 = srcPixelPtr[3];
145 
146     for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
147 
148       X += dX;
149       Y += dY;
150 
151       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
152       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
153       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
154       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
155             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
156       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
157       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
158             srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
159 
160       filterpos = (X >> FILTER_SHIFT) & FILTER_MASK;
161       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
162 
163       xf0 = fptr[0] >> 1;
164       xf1 = fptr[1] >> 1;
165       xf2 = fptr[2] >> 1;
166       xf3 = fptr[3] >> 1;
167 
168       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
169 
170       filterpos = (Y >> FILTER_SHIFT) & FILTER_MASK;
171       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
172 
173       yf0 = fptr[0];
174       yf1 = fptr[1];
175       yf2 = fptr[2];
176       yf3 = fptr[3];
177 
178       S32_TO_U16_SAT(dstPixelPtr[0]);
179 
180       xSrc = (X >> MLIB_SHIFT) - 1;
181       ySrc = (Y >> MLIB_SHIFT) - 1;
182 
183       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
184       s0 = srcPixelPtr[0];
185       s1 = srcPixelPtr[1];
186       s2 = srcPixelPtr[2];
187       s3 = srcPixelPtr[3];
188 
189       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
190       s4 = srcPixelPtr[0];
191       s5 = srcPixelPtr[1];
192       s6 = srcPixelPtr[2];
193       s7 = srcPixelPtr[3];
194     }
195 
196     c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
197     c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
198     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
199     c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
200           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
201     srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
202     c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
203           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3 + ROUND_X) >> SHIFT_X;
204 
205     val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
206     S32_TO_U16_SAT(dstPixelPtr[0]);
207   }
208 
209   return MLIB_SUCCESS;
210 }
211 
212 /***************************************************************/
213 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
214 {
215   DECLAREVAR_BC();
216   DTYPE *dstLineEnd;
217   const mlib_s16 *mlib_filters_table;
218 
219   if (filter == MLIB_BICUBIC) {
220     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
221   }
222   else {
223     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
224   }
225 
226   for (j = yStart; j <= yFinish; j++) {
227     mlib_s32 xf0, xf1, xf2, xf3;
228     mlib_s32 yf0, yf1, yf2, yf3;
229     mlib_s32 c0, c1, c2, c3, val0;
230     mlib_s32 filterpos, k;
231     mlib_s16 *fptr;
232     mlib_s32 s0, s1, s2, s3;
233     mlib_s32 s4, s5, s6, s7;
234 
235     CLIP(2);
236     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
237 
238     for (k = 0; k < 2; k++) {
239       mlib_s32 X1 = X;
240       mlib_s32 Y1 = Y;
241       DTYPE *dPtr = dstPixelPtr + k;
242 
243       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
244       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
245 
246       xf0 = fptr[0] >> 1;
247       xf1 = fptr[1] >> 1;
248       xf2 = fptr[2] >> 1;
249       xf3 = fptr[3] >> 1;
250 
251       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
252       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
253 
254       yf0 = fptr[0];
255       yf1 = fptr[1];
256       yf2 = fptr[2];
257       yf3 = fptr[3];
258 
259       xSrc = (X1 >> MLIB_SHIFT) - 1;
260       ySrc = (Y1 >> MLIB_SHIFT) - 1;
261 
262       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
263       s0 = srcPixelPtr[0];
264       s1 = srcPixelPtr[2];
265       s2 = srcPixelPtr[4];
266       s3 = srcPixelPtr[6];
267 
268       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
269       s4 = srcPixelPtr[0];
270       s5 = srcPixelPtr[2];
271       s6 = srcPixelPtr[4];
272       s7 = srcPixelPtr[6];
273 
274       for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
275 
276         X1 += dX;
277         Y1 += dY;
278 
279         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
280         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
281         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
282         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
283               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
284         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
285         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
286               srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
287 
288         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
289         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
290 
291         xf0 = fptr[0] >> 1;
292         xf1 = fptr[1] >> 1;
293         xf2 = fptr[2] >> 1;
294         xf3 = fptr[3] >> 1;
295 
296         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
297 
298         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
299         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
300 
301         yf0 = fptr[0];
302         yf1 = fptr[1];
303         yf2 = fptr[2];
304         yf3 = fptr[3];
305 
306         S32_TO_U16_SAT(dPtr[0]);
307 
308         xSrc = (X1 >> MLIB_SHIFT) - 1;
309         ySrc = (Y1 >> MLIB_SHIFT) - 1;
310 
311         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
312         s0 = srcPixelPtr[0];
313         s1 = srcPixelPtr[2];
314         s2 = srcPixelPtr[4];
315         s3 = srcPixelPtr[6];
316 
317         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
318         s4 = srcPixelPtr[0];
319         s5 = srcPixelPtr[2];
320         s6 = srcPixelPtr[4];
321         s7 = srcPixelPtr[6];
322       }
323 
324       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
325       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
326       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
327       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
328             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
329       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
330       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
331             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3 + ROUND_X) >> SHIFT_X;
332 
333       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
334       S32_TO_U16_SAT(dPtr[0]);
335     }
336   }
337 
338   return MLIB_SUCCESS;
339 }
340 
341 /***************************************************************/
342 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
343 {
344   DECLAREVAR_BC();
345   DTYPE *dstLineEnd;
346   const mlib_s16 *mlib_filters_table;
347 
348   if (filter == MLIB_BICUBIC) {
349     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
350   }
351   else {
352     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
353   }
354 
355   for (j = yStart; j <= yFinish; j++) {
356     mlib_s32 xf0, xf1, xf2, xf3;
357     mlib_s32 yf0, yf1, yf2, yf3;
358     mlib_s32 c0, c1, c2, c3, val0;
359     mlib_s32 filterpos, k;
360     mlib_s16 *fptr;
361     mlib_s32 s0, s1, s2, s3;
362     mlib_s32 s4, s5, s6, s7;
363 
364     CLIP(3);
365     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
366 
367     for (k = 0; k < 3; k++) {
368       mlib_s32 X1 = X;
369       mlib_s32 Y1 = Y;
370       DTYPE *dPtr = dstPixelPtr + k;
371 
372       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
373       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
374 
375       xf0 = fptr[0] >> 1;
376       xf1 = fptr[1] >> 1;
377       xf2 = fptr[2] >> 1;
378       xf3 = fptr[3] >> 1;
379 
380       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
381       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
382 
383       yf0 = fptr[0];
384       yf1 = fptr[1];
385       yf2 = fptr[2];
386       yf3 = fptr[3];
387 
388       xSrc = (X1 >> MLIB_SHIFT) - 1;
389       ySrc = (Y1 >> MLIB_SHIFT) - 1;
390 
391       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
392       s0 = srcPixelPtr[0];
393       s1 = srcPixelPtr[3];
394       s2 = srcPixelPtr[6];
395       s3 = srcPixelPtr[9];
396 
397       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
398       s4 = srcPixelPtr[0];
399       s5 = srcPixelPtr[3];
400       s6 = srcPixelPtr[6];
401       s7 = srcPixelPtr[9];
402 
403       for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
404 
405         X1 += dX;
406         Y1 += dY;
407 
408         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
409         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
410         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
411         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
412               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
413         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
414         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
415               srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
416 
417         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
418         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
419 
420         xf0 = fptr[0] >> 1;
421         xf1 = fptr[1] >> 1;
422         xf2 = fptr[2] >> 1;
423         xf3 = fptr[3] >> 1;
424 
425         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
426 
427         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
428         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
429 
430         yf0 = fptr[0];
431         yf1 = fptr[1];
432         yf2 = fptr[2];
433         yf3 = fptr[3];
434 
435         S32_TO_U16_SAT(dPtr[0]);
436 
437         xSrc = (X1 >> MLIB_SHIFT) - 1;
438         ySrc = (Y1 >> MLIB_SHIFT) - 1;
439 
440         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
441         s0 = srcPixelPtr[0];
442         s1 = srcPixelPtr[3];
443         s2 = srcPixelPtr[6];
444         s3 = srcPixelPtr[9];
445 
446         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
447         s4 = srcPixelPtr[0];
448         s5 = srcPixelPtr[3];
449         s6 = srcPixelPtr[6];
450         s7 = srcPixelPtr[9];
451       }
452 
453       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
454       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
455       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
456       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
457             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
458       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
459       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
460             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3 + ROUND_X) >> SHIFT_X;
461 
462       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
463       S32_TO_U16_SAT(dPtr[0]);
464     }
465   }
466 
467   return MLIB_SUCCESS;
468 }
469 
470 /***************************************************************/
471 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
472 {
473   DECLAREVAR_BC();
474   DTYPE *dstLineEnd;
475   const mlib_s16 *mlib_filters_table;
476 
477   if (filter == MLIB_BICUBIC) {
478     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc;
479   }
480   else {
481     mlib_filters_table = (mlib_s16 *) mlib_filters_s16_bc2;
482   }
483 
484   for (j = yStart; j <= yFinish; j++) {
485     mlib_s32 xf0, xf1, xf2, xf3;
486     mlib_s32 yf0, yf1, yf2, yf3;
487     mlib_s32 c0, c1, c2, c3, val0;
488     mlib_s32 filterpos, k;
489     mlib_s16 *fptr;
490     mlib_s32 s0, s1, s2, s3;
491     mlib_s32 s4, s5, s6, s7;
492 
493     CLIP(4);
494     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
495 
496     for (k = 0; k < 4; k++) {
497       mlib_s32 X1 = X;
498       mlib_s32 Y1 = Y;
499       DTYPE *dPtr = dstPixelPtr + k;
500 
501       filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
502       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
503 
504       xf0 = fptr[0] >> 1;
505       xf1 = fptr[1] >> 1;
506       xf2 = fptr[2] >> 1;
507       xf3 = fptr[3] >> 1;
508 
509       filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
510       fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
511 
512       yf0 = fptr[0];
513       yf1 = fptr[1];
514       yf2 = fptr[2];
515       yf3 = fptr[3];
516 
517       xSrc = (X1 >> MLIB_SHIFT) - 1;
518       ySrc = (Y1 >> MLIB_SHIFT) - 1;
519 
520       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
521       s0 = srcPixelPtr[0];
522       s1 = srcPixelPtr[4];
523       s2 = srcPixelPtr[8];
524       s3 = srcPixelPtr[12];
525 
526       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
527       s4 = srcPixelPtr[0];
528       s5 = srcPixelPtr[4];
529       s6 = srcPixelPtr[8];
530       s7 = srcPixelPtr[12];
531 
532       for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
533 
534         X1 += dX;
535         Y1 += dY;
536 
537         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
538         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
539         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
540         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
541               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
542         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
543         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
544               srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
545 
546         filterpos = (X1 >> FILTER_SHIFT) & FILTER_MASK;
547         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
548 
549         xf0 = fptr[0] >> 1;
550         xf1 = fptr[1] >> 1;
551         xf2 = fptr[2] >> 1;
552         xf3 = fptr[3] >> 1;
553 
554         val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
555 
556         filterpos = (Y1 >> FILTER_SHIFT) & FILTER_MASK;
557         fptr = (mlib_s16 *) ((mlib_u8 *) mlib_filters_table + filterpos);
558 
559         yf0 = fptr[0];
560         yf1 = fptr[1];
561         yf2 = fptr[2];
562         yf3 = fptr[3];
563 
564         S32_TO_U16_SAT(dPtr[0]);
565 
566         xSrc = (X1 >> MLIB_SHIFT) - 1;
567         ySrc = (Y1 >> MLIB_SHIFT) - 1;
568 
569         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
570         s0 = srcPixelPtr[0];
571         s1 = srcPixelPtr[4];
572         s2 = srcPixelPtr[8];
573         s3 = srcPixelPtr[12];
574 
575         srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
576         s4 = srcPixelPtr[0];
577         s5 = srcPixelPtr[4];
578         s6 = srcPixelPtr[8];
579         s7 = srcPixelPtr[12];
580       }
581 
582       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3 + ROUND_X) >> SHIFT_X;
583       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3 + ROUND_X) >> SHIFT_X;
584       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
585       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
586             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
587       srcPixelPtr = (DTYPE *) ((mlib_addr) srcPixelPtr + srcYStride);
588       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
589             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3 + ROUND_X) >> SHIFT_X;
590 
591       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3 + ROUND_Y) >> SHIFT_Y;
592       S32_TO_U16_SAT(dPtr[0]);
593     }
594   }
595 
596   return MLIB_SUCCESS;
597 }
598 
599 /***************************************************************/
600