1 /*
2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 
27 /*
28  * FUNCTION
29  *      Image affine transformation with Bicubic filtering
30  * SYNOPSIS
31  *      mlib_status mlib_ImageAffine_[s32|f32|d64]_?ch_bc
32  *                                  (mlib_s32 *leftEdges,
33  *                                   mlib_s32 *rightEdges,
34  *                                   mlib_s32 *xStarts,
35  *                                   mlib_s32 *yStarts,
36  *                                   mlib_s32 *sides,
37  *                                   mlib_u8  *dstData,
38  *                                   mlib_u8  **lineAddr,
39  *                                   mlib_s32 dstYStride,
40  *                                   mlib_s32 is_affine,
41  *                                   mlib_s32 srcYStride,
42  *                                   mlib_filter filter)
43  *
44  * ARGUMENTS
45  *      leftEdges  array[dstHeight] of xLeft coordinates
46  *      RightEdges array[dstHeight] of xRight coordinates
47  *      xStarts    array[dstHeight] of xStart * 65536 coordinates
48  *      yStarts    array[dstHeight] of yStart * 65536 coordinates
49  *      sides      output array[4]. sides[0] is yStart, sides[1] is yFinish,
50  *                 sides[2] is dx * 65536, sides[3] is dy * 65536
51  *      dstData    pointer to the first pixel on (yStart - 1) line
52  *      lineAddr   array[srcHeight] of pointers to the first pixel on
53  *                 the corresponding lines
54  *      dstYStride stride of destination image
55  *      is_affine  indicator (Affine - GridWarp)
56  *      srcYStride stride of source image
57  *      filter     type of resampling filter
58  *
59  * DESCRIPTION
60  *      The functions step along the lines from xLeft to xRight and apply
61  *      the Bicubic and Bicubic2 filtering.
62  *
63  */
64 
65 #include "mlib_ImageAffine.h"
66 
67 #define IMG_TYPE  3
68 
69 /***************************************************************/
70 #if IMG_TYPE == 3
71 
72 #define DTYPE  mlib_s32
73 #define FTYPE  mlib_d64
74 
75 #define FUN_NAME(CHAN) mlib_ImageAffine_s32_##CHAN##_bc
76 
77 #define STORE(res, x) SAT32(res)
78 
79 #elif IMG_TYPE == 4
80 
81 #define DTYPE  mlib_f32
82 #define FTYPE  DTYPE
83 
84 #define FUN_NAME(CHAN) mlib_ImageAffine_f32_##CHAN##_bc
85 
86 #define STORE(res, x) res = (x)
87 
88 #elif IMG_TYPE == 5
89 
90 #define DTYPE  mlib_d64
91 #define FTYPE  DTYPE
92 
93 #define FUN_NAME(CHAN) mlib_ImageAffine_d64_##CHAN##_bc
94 
95 #define STORE(res, x) res = (x)
96 
97 #endif
98 
99 /***************************************************************/
100 #define CREATE_COEF_BICUBIC( X, Y, OPERATOR )                   \
101   dx = (X & MLIB_MASK) * scale;                                 \
102   dy = (Y & MLIB_MASK) * scale;                                 \
103   dx_2  = ((FTYPE)0.5)  * dx;                                   \
104   dy_2  = ((FTYPE)0.5)  * dy;                                   \
105   dx2   = dx   * dx;    dy2   = dy   * dy;                      \
106   dx3_2 = dx_2 * dx2;   dy3_2 = dy_2 * dy2;                     \
107   dx3_3 = ((FTYPE)3.0)  * dx3_2;                                \
108   dy3_3 = ((FTYPE)3.0)  * dy3_2;                                \
109                                                                 \
110   xf0 = dx2 - dx3_2 - dx_2;                                     \
111   xf1 = dx3_3 - ((FTYPE)2.5) * dx2 + ((FTYPE)1.0);              \
112   xf2 = ((FTYPE)2.0) * dx2 - dx3_3 + dx_2;                      \
113   xf3 = dx3_2 - ((FTYPE)0.5) * dx2;                             \
114                                                                 \
115   OPERATOR;                                                     \
116                                                                 \
117   yf0 = dy2 - dy3_2 - dy_2;                                     \
118   yf1 = dy3_3 - ((FTYPE)2.5) * dy2 + ((FTYPE)1.0);              \
119   yf2 = ((FTYPE)2.0) * dy2 - dy3_3 + dy_2;                      \
120   yf3 = dy3_2 - ((FTYPE)0.5) * dy2
121 
122 /***************************************************************/
123 #define CREATE_COEF_BICUBIC_2( X, Y, OPERATOR )                 \
124   dx = (X & MLIB_MASK) * scale;                                 \
125   dy = (Y & MLIB_MASK) * scale;                                 \
126   dx2   = dx  * dx;    dy2   = dy  * dy;                        \
127   dx3_2 = dx  * dx2;   dy3_2 = dy  * dy2;                       \
128   dx3_3 = ((FTYPE)2.0) * dx2;                                   \
129   dy3_3 = ((FTYPE)2.0) * dy2;                                   \
130                                                                 \
131   xf0 = dx3_3 - dx3_2 - dx;                                     \
132   xf1 = dx3_2 - dx3_3 + ((FTYPE)1.0);                           \
133   xf2 = dx2   - dx3_2   + dx;                                   \
134   xf3 = dx3_2 - dx2;                                            \
135                                                                 \
136   OPERATOR;                                                     \
137                                                                 \
138   yf0 = dy3_3 - dy3_2 - dy;                                     \
139   yf1 = dy3_2 - dy3_3 + ((FTYPE)1.0);                           \
140   yf2 = dy2   - dy3_2   + dy;                                   \
141   yf3 = dy3_2 - dy2
142 
143 /***************************************************************/
144 mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
145 {
146   DECLAREVAR_BC();
147   DTYPE *dstLineEnd;
148 
149   for (j = yStart; j <= yFinish; j++) {
150     FTYPE xf0, xf1, xf2, xf3;
151     FTYPE yf0, yf1, yf2, yf3;
152     FTYPE dx, dx_2, dx2, dx3_2, dx3_3;
153     FTYPE dy, dy_2, dy2, dy3_2, dy3_3;
154     FTYPE c0, c1, c2, c3, val0;
155     FTYPE scale = 1 / 65536.f;
156     FTYPE s0, s1, s2, s3;
157     FTYPE s4, s5, s6, s7;
158 
159     CLIP(1);
160     dstLineEnd = (DTYPE *) dstData + xRight;
161 
162     if (filter == MLIB_BICUBIC) {
163       CREATE_COEF_BICUBIC(X, Y,;
164         );
165     }
166     else {
167       CREATE_COEF_BICUBIC_2(X, Y,;
168         );
169     }
170 
171     xSrc = (X >> MLIB_SHIFT) - 1;
172     ySrc = (Y >> MLIB_SHIFT) - 1;
173 
174     srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
175     s0 = srcPixelPtr[0];
176     s1 = srcPixelPtr[1];
177     s2 = srcPixelPtr[2];
178     s3 = srcPixelPtr[3];
179 
180     srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
181     s4 = srcPixelPtr[0];
182     s5 = srcPixelPtr[1];
183     s6 = srcPixelPtr[2];
184     s7 = srcPixelPtr[3];
185 
186     if (filter == MLIB_BICUBIC) {
187       for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
188         X += dX;
189         Y += dY;
190 
191         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
192         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
193         srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
194         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
195               srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
196         srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
197         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
198               srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
199 
200         CREATE_COEF_BICUBIC(X, Y, val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3));
201 
202         STORE(dstPixelPtr[0], val0);
203 
204         xSrc = (X >> MLIB_SHIFT) - 1;
205         ySrc = (Y >> MLIB_SHIFT) - 1;
206 
207         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
208         s0 = srcPixelPtr[0];
209         s1 = srcPixelPtr[1];
210         s2 = srcPixelPtr[2];
211         s3 = srcPixelPtr[3];
212 
213         srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
214         s4 = srcPixelPtr[0];
215         s5 = srcPixelPtr[1];
216         s6 = srcPixelPtr[2];
217         s7 = srcPixelPtr[3];
218       }
219 
220     }
221     else {
222       for (; dstPixelPtr <= (dstLineEnd - 1); dstPixelPtr++) {
223         X += dX;
224         Y += dY;
225 
226         c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
227         c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
228         srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
229         c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
230               srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
231         srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
232         c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
233               srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
234 
235         CREATE_COEF_BICUBIC_2(X, Y, val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3));
236 
237         STORE(dstPixelPtr[0], val0);
238 
239         xSrc = (X >> MLIB_SHIFT) - 1;
240         ySrc = (Y >> MLIB_SHIFT) - 1;
241 
242         srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + xSrc;
243         s0 = srcPixelPtr[0];
244         s1 = srcPixelPtr[1];
245         s2 = srcPixelPtr[2];
246         s3 = srcPixelPtr[3];
247 
248         srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
249         s4 = srcPixelPtr[0];
250         s5 = srcPixelPtr[1];
251         s6 = srcPixelPtr[2];
252         s7 = srcPixelPtr[3];
253       }
254     }
255 
256     c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
257     c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
258     srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
259     c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
260           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
261     srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
262     c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[1] * xf1 +
263           srcPixelPtr[2] * xf2 + srcPixelPtr[3] * xf3);
264 
265     val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
266     STORE(dstPixelPtr[0], val0);
267   }
268 
269   return MLIB_SUCCESS;
270 }
271 
272 /***************************************************************/
273 mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
274 {
275   DECLAREVAR_BC();
276   DTYPE *dstLineEnd;
277 
278   for (j = yStart; j <= yFinish; j++) {
279     FTYPE xf0, xf1, xf2, xf3;
280     FTYPE yf0, yf1, yf2, yf3;
281     FTYPE dx, dx_2, dx2, dx3_2, dx3_3;
282     FTYPE dy, dy_2, dy2, dy3_2, dy3_3;
283     FTYPE c0, c1, c2, c3, val0;
284     FTYPE scale = 1 / 65536.f;
285     FTYPE s0, s1, s2, s3;
286     FTYPE s4, s5, s6, s7;
287     mlib_s32 k;
288 
289     CLIP(2);
290     dstLineEnd = (DTYPE *) dstData + 2 * xRight;
291 
292     for (k = 0; k < 2; k++) {
293       mlib_s32 X1 = X;
294       mlib_s32 Y1 = Y;
295       DTYPE *dPtr = dstPixelPtr + k;
296 
297       if (filter == MLIB_BICUBIC) {
298         CREATE_COEF_BICUBIC(X1, Y1,;
299           );
300       }
301       else {
302         CREATE_COEF_BICUBIC_2(X1, Y1,;
303           );
304       }
305 
306       xSrc = (X1 >> MLIB_SHIFT) - 1;
307       ySrc = (Y1 >> MLIB_SHIFT) - 1;
308 
309       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
310       s0 = srcPixelPtr[0];
311       s1 = srcPixelPtr[2];
312       s2 = srcPixelPtr[4];
313       s3 = srcPixelPtr[6];
314 
315       srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
316       s4 = srcPixelPtr[0];
317       s5 = srcPixelPtr[2];
318       s6 = srcPixelPtr[4];
319       s7 = srcPixelPtr[6];
320 
321       if (filter == MLIB_BICUBIC) {
322         for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
323           X1 += dX;
324           Y1 += dY;
325 
326           c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
327           c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
328           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
329           c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
330                 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
331           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
332           c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
333                 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
334 
335           CREATE_COEF_BICUBIC(X1, Y1, val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3));
336 
337           STORE(dPtr[0], val0);
338 
339           xSrc = (X1 >> MLIB_SHIFT) - 1;
340           ySrc = (Y1 >> MLIB_SHIFT) - 1;
341 
342           srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
343           s0 = srcPixelPtr[0];
344           s1 = srcPixelPtr[2];
345           s2 = srcPixelPtr[4];
346           s3 = srcPixelPtr[6];
347 
348           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
349           s4 = srcPixelPtr[0];
350           s5 = srcPixelPtr[2];
351           s6 = srcPixelPtr[4];
352           s7 = srcPixelPtr[6];
353         }
354 
355       }
356       else {
357         for (; dPtr <= (dstLineEnd - 1); dPtr += 2) {
358           X1 += dX;
359           Y1 += dY;
360 
361           c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
362           c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
363           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
364           c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
365                 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
366           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
367           c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
368                 srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
369 
370           CREATE_COEF_BICUBIC_2(X1, Y1, val0 =
371                                 (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3));
372 
373           STORE(dPtr[0], val0);
374 
375           xSrc = (X1 >> MLIB_SHIFT) - 1;
376           ySrc = (Y1 >> MLIB_SHIFT) - 1;
377 
378           srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 2 * xSrc + k;
379           s0 = srcPixelPtr[0];
380           s1 = srcPixelPtr[2];
381           s2 = srcPixelPtr[4];
382           s3 = srcPixelPtr[6];
383 
384           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
385           s4 = srcPixelPtr[0];
386           s5 = srcPixelPtr[2];
387           s6 = srcPixelPtr[4];
388           s7 = srcPixelPtr[6];
389         }
390       }
391 
392       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
393       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
394       srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
395       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
396             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
397       srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
398       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[2] * xf1 +
399             srcPixelPtr[4] * xf2 + srcPixelPtr[6] * xf3);
400 
401       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
402       STORE(dPtr[0], val0);
403     }
404   }
405 
406   return MLIB_SUCCESS;
407 }
408 
409 /***************************************************************/
410 mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
411 {
412   DECLAREVAR_BC();
413   DTYPE *dstLineEnd;
414 
415   for (j = yStart; j <= yFinish; j++) {
416     FTYPE xf0, xf1, xf2, xf3;
417     FTYPE yf0, yf1, yf2, yf3;
418     FTYPE dx, dx_2, dx2, dx3_2, dx3_3;
419     FTYPE dy, dy_2, dy2, dy3_2, dy3_3;
420     FTYPE c0, c1, c2, c3, val0;
421     FTYPE scale = 1 / 65536.f;
422     FTYPE s0, s1, s2, s3;
423     FTYPE s4, s5, s6, s7;
424     mlib_s32 k;
425 
426     CLIP(3);
427     dstLineEnd = (DTYPE *) dstData + 3 * xRight;
428 
429     for (k = 0; k < 3; k++) {
430       mlib_s32 X1 = X;
431       mlib_s32 Y1 = Y;
432       DTYPE *dPtr = dstPixelPtr + k;
433 
434       if (filter == MLIB_BICUBIC) {
435         CREATE_COEF_BICUBIC(X1, Y1,;
436           );
437       }
438       else {
439         CREATE_COEF_BICUBIC_2(X1, Y1,;
440           );
441       }
442 
443       xSrc = (X1 >> MLIB_SHIFT) - 1;
444       ySrc = (Y1 >> MLIB_SHIFT) - 1;
445 
446       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
447       s0 = srcPixelPtr[0];
448       s1 = srcPixelPtr[3];
449       s2 = srcPixelPtr[6];
450       s3 = srcPixelPtr[9];
451 
452       srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
453       s4 = srcPixelPtr[0];
454       s5 = srcPixelPtr[3];
455       s6 = srcPixelPtr[6];
456       s7 = srcPixelPtr[9];
457 
458       if (filter == MLIB_BICUBIC) {
459         for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
460           X1 += dX;
461           Y1 += dY;
462 
463           c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
464           c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
465           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
466           c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
467                 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
468           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
469           c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
470                 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
471 
472           CREATE_COEF_BICUBIC(X1, Y1, val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3));
473 
474           STORE(dPtr[0], val0);
475 
476           xSrc = (X1 >> MLIB_SHIFT) - 1;
477           ySrc = (Y1 >> MLIB_SHIFT) - 1;
478 
479           srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
480           s0 = srcPixelPtr[0];
481           s1 = srcPixelPtr[3];
482           s2 = srcPixelPtr[6];
483           s3 = srcPixelPtr[9];
484 
485           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
486           s4 = srcPixelPtr[0];
487           s5 = srcPixelPtr[3];
488           s6 = srcPixelPtr[6];
489           s7 = srcPixelPtr[9];
490         }
491 
492       }
493       else {
494         for (; dPtr <= (dstLineEnd - 1); dPtr += 3) {
495           X1 += dX;
496           Y1 += dY;
497 
498           c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
499           c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
500           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
501           c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
502                 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
503           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
504           c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
505                 srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
506 
507           CREATE_COEF_BICUBIC_2(X1, Y1, val0 =
508                                 (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3));
509 
510           STORE(dPtr[0], val0);
511 
512           xSrc = (X1 >> MLIB_SHIFT) - 1;
513           ySrc = (Y1 >> MLIB_SHIFT) - 1;
514 
515           srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 3 * xSrc + k;
516           s0 = srcPixelPtr[0];
517           s1 = srcPixelPtr[3];
518           s2 = srcPixelPtr[6];
519           s3 = srcPixelPtr[9];
520 
521           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
522           s4 = srcPixelPtr[0];
523           s5 = srcPixelPtr[3];
524           s6 = srcPixelPtr[6];
525           s7 = srcPixelPtr[9];
526         }
527       }
528 
529       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
530       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
531       srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
532       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
533             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
534       srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
535       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[3] * xf1 +
536             srcPixelPtr[6] * xf2 + srcPixelPtr[9] * xf3);
537 
538       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
539       STORE(dPtr[0], val0);
540     }
541   }
542 
543   return MLIB_SUCCESS;
544 }
545 
546 /***************************************************************/
547 mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
548 {
549   DECLAREVAR_BC();
550   DTYPE *dstLineEnd;
551 
552   for (j = yStart; j <= yFinish; j++) {
553     FTYPE xf0, xf1, xf2, xf3;
554     FTYPE yf0, yf1, yf2, yf3;
555     FTYPE dx, dx_2, dx2, dx3_2, dx3_3;
556     FTYPE dy, dy_2, dy2, dy3_2, dy3_3;
557     FTYPE c0, c1, c2, c3, val0;
558     FTYPE scale = 1 / 65536.f;
559     FTYPE s0, s1, s2, s3;
560     FTYPE s4, s5, s6, s7;
561     mlib_s32 k;
562 
563     CLIP(4);
564     dstLineEnd = (DTYPE *) dstData + 4 * xRight;
565 
566     for (k = 0; k < 4; k++) {
567       mlib_s32 X1 = X;
568       mlib_s32 Y1 = Y;
569       DTYPE *dPtr = dstPixelPtr + k;
570 
571       if (filter == MLIB_BICUBIC) {
572         CREATE_COEF_BICUBIC(X1, Y1,;
573           );
574       }
575       else {
576         CREATE_COEF_BICUBIC_2(X1, Y1,;
577           );
578       }
579 
580       xSrc = (X1 >> MLIB_SHIFT) - 1;
581       ySrc = (Y1 >> MLIB_SHIFT) - 1;
582 
583       srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
584       s0 = srcPixelPtr[0];
585       s1 = srcPixelPtr[4];
586       s2 = srcPixelPtr[8];
587       s3 = srcPixelPtr[12];
588 
589       srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
590       s4 = srcPixelPtr[0];
591       s5 = srcPixelPtr[4];
592       s6 = srcPixelPtr[8];
593       s7 = srcPixelPtr[12];
594 
595       if (filter == MLIB_BICUBIC) {
596         for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
597 
598           X1 += dX;
599           Y1 += dY;
600 
601           c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
602           c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
603           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
604           c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
605                 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
606           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
607           c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
608                 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
609 
610           CREATE_COEF_BICUBIC(X1, Y1, val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3));
611 
612           STORE(dPtr[0], val0);
613 
614           xSrc = (X1 >> MLIB_SHIFT) - 1;
615           ySrc = (Y1 >> MLIB_SHIFT) - 1;
616 
617           srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
618           s0 = srcPixelPtr[0];
619           s1 = srcPixelPtr[4];
620           s2 = srcPixelPtr[8];
621           s3 = srcPixelPtr[12];
622 
623           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
624           s4 = srcPixelPtr[0];
625           s5 = srcPixelPtr[4];
626           s6 = srcPixelPtr[8];
627           s7 = srcPixelPtr[12];
628         }
629 
630       }
631       else {
632         for (; dPtr <= (dstLineEnd - 1); dPtr += 4) {
633 
634           X1 += dX;
635           Y1 += dY;
636 
637           c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
638           c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
639           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
640           c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
641                 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
642           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
643           c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
644                 srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
645 
646           CREATE_COEF_BICUBIC_2(X1, Y1, val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3));
647 
648           STORE(dPtr[0], val0);
649 
650           xSrc = (X1 >> MLIB_SHIFT) - 1;
651           ySrc = (Y1 >> MLIB_SHIFT) - 1;
652 
653           srcPixelPtr = ((DTYPE **) lineAddr)[ySrc] + 4 * xSrc + k;
654           s0 = srcPixelPtr[0];
655           s1 = srcPixelPtr[4];
656           s2 = srcPixelPtr[8];
657           s3 = srcPixelPtr[12];
658 
659           srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
660           s4 = srcPixelPtr[0];
661           s5 = srcPixelPtr[4];
662           s6 = srcPixelPtr[8];
663           s7 = srcPixelPtr[12];
664         }
665       }
666 
667       c0 = (s0 * xf0 + s1 * xf1 + s2 * xf2 + s3 * xf3);
668       c1 = (s4 * xf0 + s5 * xf1 + s6 * xf2 + s7 * xf3);
669       srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
670       c2 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
671             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
672       srcPixelPtr = (DTYPE *) ((mlib_u8 *) srcPixelPtr + srcYStride);
673       c3 = (srcPixelPtr[0] * xf0 + srcPixelPtr[4] * xf1 +
674             srcPixelPtr[8] * xf2 + srcPixelPtr[12] * xf3);
675 
676       val0 = (c0 * yf0 + c1 * yf1 + c2 * yf2 + c3 * yf3);
677       STORE(dPtr[0], val0);
678     }
679   }
680 
681   return MLIB_SUCCESS;
682 }
683 
684 /***************************************************************/
685