1 /*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28 /*
29 * The functions step along the lines from xLeft to xRight and apply
30 * the bicubic filtering.
31 *
32 */
33
34 #include "vis_proto.h"
35 #include "mlib_ImageAffine.h"
36 #include "mlib_v_ImageFilters.h"
37
38 /***************************************************************/
39 #define DTYPE mlib_s16
40
41 #define FILTER_BITS 9
42
43 /***************************************************************/
44 #define sPtr srcPixelPtr
45
46 /***************************************************************/
47 #define NEXT_PIXEL_1BC_S16() \
48 xSrc = (X >> MLIB_SHIFT)-1; \
49 ySrc = (Y >> MLIB_SHIFT)-1; \
50 sPtr = (mlib_s16 *)lineAddr[ySrc] + xSrc
51
52 /***************************************************************/
53 #define LOAD_BC_S16_1CH_1PIXEL(mlib_filters_s16, mlib_filters_s16_4) \
54 dpSrc = vis_alignaddr(sPtr, 0); \
55 data0 = dpSrc[0]; \
56 data1 = dpSrc[1]; \
57 row0 = vis_faligndata(data0, data1); \
58 sPtr += srcYStride; \
59 dpSrc = vis_alignaddr(sPtr, 0); \
60 data0 = dpSrc[0]; \
61 data1 = dpSrc[1]; \
62 row1 = vis_faligndata(data0, data1); \
63 sPtr += srcYStride; \
64 dpSrc = vis_alignaddr(sPtr, 0); \
65 data0 = dpSrc[0]; \
66 data1 = dpSrc[1]; \
67 row2 = vis_faligndata(data0, data1); \
68 sPtr += srcYStride; \
69 dpSrc = vis_alignaddr(sPtr, 0); \
70 data0 = dpSrc[0]; \
71 data1 = dpSrc[1]; \
72 row3 = vis_faligndata(data0, data1); \
73 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \
74 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
75 yFilter0 = yPtr[0]; \
76 yFilter1 = yPtr[1]; \
77 yFilter2 = yPtr[2]; \
78 yFilter3 = yPtr[3]; \
79 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \
80 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \
81 X += dX; \
82 Y += dY
83
84 /***************************************************************/
85 #define RESULT_1BC_S16_1PIXEL() \
86 u0 = vis_fmul8sux16(vis_fxor(row0, mask8000), yFilter0); \
87 u1 = vis_fmul8ulx16(vis_fxor(row0, mask8000), yFilter0); \
88 u2 = vis_fmul8sux16(vis_fxor(row1, mask8000), yFilter1); \
89 v0 = vis_fpadd16(u0, u1); \
90 u3 = vis_fmul8ulx16(vis_fxor(row1, mask8000), yFilter1); \
91 u0 = vis_fmul8sux16(vis_fxor(row2, mask8000), yFilter2); \
92 v1 = vis_fpadd16(u2, u3); \
93 u1 = vis_fmul8ulx16(vis_fxor(row2, mask8000), yFilter2); \
94 sum = vis_fpadd16(v0, v1); \
95 u2 = vis_fmul8sux16(vis_fxor(row3, mask8000), yFilter3); \
96 v2 = vis_fpadd16(u0, u1); \
97 u3 = vis_fmul8ulx16(vis_fxor(row3, mask8000), yFilter3); \
98 sum = vis_fpadd16(sum, v2); \
99 v3 = vis_fpadd16(u2, u3); \
100 sum = vis_fpadd16(sum, v3); \
101 d00 = vis_fmul8sux16(sum, xFilter); \
102 d10 = vis_fmul8ulx16(sum, xFilter); \
103 d0 = vis_fpadd16(d00, d10); \
104 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \
105 d0 = vis_fmuld8sux16(f_x01000100, p0); \
106 d1 = vis_write_lo(d1, vis_fpadd32s(vis_read_hi(d0), vis_read_lo(d0))); \
107 res = vis_fxor(vis_fpackfix_pair(d1, d1), mask8000)
108
109 /***************************************************************/
110 #define BC_S16_1CH(ind, mlib_filters_s16, mlib_filters_s16_4) \
111 u0 = vis_fmul8sux16(vis_fxor(row0, mask8000), yFilter0); \
112 u1 = vis_fmul8ulx16(vis_fxor(row0, mask8000), yFilter0); \
113 dpSrc = vis_alignaddr(sPtr, 0); \
114 u2 = vis_fmul8sux16(vis_fxor(row1, mask8000), yFilter1); \
115 v0 = vis_fpadd16(u0, u1); \
116 data0 = dpSrc[0]; \
117 filterposy = (Y >> FILTER_SHIFT); \
118 u3 = vis_fmul8ulx16(vis_fxor(row1, mask8000), yFilter1); \
119 data1 = dpSrc[1]; \
120 row0 = vis_faligndata(data0, data1); \
121 filterposx = (X >> FILTER_SHIFT); \
122 sPtr += srcYStride; \
123 dpSrc = vis_alignaddr(sPtr, 0); \
124 u0 = vis_fmul8sux16(vis_fxor(row2, mask8000), yFilter2); \
125 v1 = vis_fpadd16(u2, u3); \
126 data0 = dpSrc[0]; \
127 u1 = vis_fmul8ulx16(vis_fxor(row2, mask8000), yFilter2); \
128 sum = vis_fpadd16(v0, v1); \
129 X += dX; \
130 data1 = dpSrc[1]; \
131 row1 = vis_faligndata(data0, data1); \
132 sPtr += srcYStride; \
133 dpSrc = vis_alignaddr(sPtr, 0); \
134 u2 = vis_fmul8sux16(vis_fxor(row3, mask8000), yFilter3); \
135 v2 = vis_fpadd16(u0, u1); \
136 Y += dY; \
137 xSrc = (X >> MLIB_SHIFT)-1; \
138 data0 = dpSrc[0]; \
139 u3 = vis_fmul8ulx16(vis_fxor(row3, mask8000), yFilter3); \
140 sum = vis_fpadd16(sum, v2); \
141 ySrc = (Y >> MLIB_SHIFT)-1; \
142 data1 = dpSrc[1]; \
143 filterposy &= FILTER_MASK; \
144 row2 = vis_faligndata(data0, data1); \
145 sPtr += srcYStride; \
146 filterposx &= FILTER_MASK; \
147 dpSrc = vis_alignaddr(sPtr, 0); \
148 data0 = dpSrc[0]; \
149 v3 = vis_fpadd16(u2, u3); \
150 data1 = dpSrc[1]; \
151 row3 = vis_faligndata(data0, data1); \
152 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
153 yFilter0 = yPtr[0]; \
154 sum = vis_fpadd16(sum, v3); \
155 yFilter1 = yPtr[1]; \
156 d0 = vis_fmul8sux16(sum, xFilter); \
157 yFilter2 = yPtr[2]; \
158 d1 = vis_fmul8ulx16(sum, xFilter); \
159 yFilter3 = yPtr[3]; \
160 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \
161 d0##ind = vis_fpadd16(d0, d1); \
162 sPtr = (mlib_s16 *)lineAddr[ySrc] + xSrc
163
164 /***************************************************************/
165 #define FADD_1BC_S16() \
166 p0 = vis_fpadd16s(vis_read_hi(d00), vis_read_lo(d00)); \
167 p1 = vis_fpadd16s(vis_read_hi(d01), vis_read_lo(d01)); \
168 p2 = vis_fpadd16s(vis_read_hi(d02), vis_read_lo(d02)); \
169 p3 = vis_fpadd16s(vis_read_hi(d03), vis_read_lo(d03)); \
170 d0 = vis_fmuld8sux16(f_x01000100, p0); \
171 d1 = vis_fmuld8sux16(f_x01000100, p1); \
172 d2 = vis_fmuld8sux16(f_x01000100, p2); \
173 d3 = vis_fmuld8sux16(f_x01000100, p3); \
174 d0 = vis_freg_pair(vis_fpadd32s(vis_read_hi(d0), vis_read_lo(d0)), \
175 vis_fpadd32s(vis_read_hi(d1), vis_read_lo(d1))); \
176 d1 = vis_freg_pair(vis_fpadd32s(vis_read_hi(d2), vis_read_lo(d2)), \
177 vis_fpadd32s(vis_read_hi(d3), vis_read_lo(d3))); \
178 res = vis_fxor(vis_fpackfix_pair(d0, d1), mask8000)
179
180 /***************************************************************/
mlib_ImageAffine_u16_1ch_bc(mlib_affine_param * param)181 mlib_status mlib_ImageAffine_u16_1ch_bc (mlib_affine_param *param)
182 {
183 DECLAREVAR_BC();
184 mlib_s32 filterposx, filterposy;
185 mlib_d64 data0, data1;
186 mlib_d64 sum;
187 mlib_d64 row0, row1, row2, row3;
188 mlib_f32 p0, p1, p2, p3;
189 mlib_d64 xFilter, yFilter0, yFilter1, yFilter2, yFilter3;
190 mlib_d64 v0, v1, v2, v3;
191 mlib_d64 u0, u1, u2, u3;
192 mlib_d64 d0, d1, d2, d3;
193 mlib_d64 d00, d10, d01, d02, d03;
194 mlib_d64 *yPtr;
195 mlib_d64 *dpSrc;
196 mlib_s32 align, cols, i;
197 mlib_d64 res;
198 mlib_f32 f_x01000100 = vis_to_float(0x01000100);
199 mlib_d64 mask8000 = vis_to_double_dup(0x80008000);
200 const mlib_s16 *mlib_filters_table ;
201 const mlib_s16 *mlib_filters_table_4;
202
203 if (filter == MLIB_BICUBIC) {
204 mlib_filters_table = mlib_filters_s16_bc;
205 mlib_filters_table_4 = mlib_filters_s16_bc_4;
206 } else {
207 mlib_filters_table = mlib_filters_s16_bc2;
208 mlib_filters_table_4 = mlib_filters_s16_bc2_4;
209 }
210
211 srcYStride >>= 1;
212
213 for (j = yStart; j <= yFinish; j++) {
214
215 vis_write_gsr(10 << 3);
216
217 CLIP(1);
218
219 cols = xRight - xLeft + 1;
220 align = (8 - ((mlib_addr)dstPixelPtr) & 7) & 7;
221 align >>= 1;
222 align = (cols < align)? cols : align;
223
224 for (i = 0; i < align; i++) {
225 NEXT_PIXEL_1BC_S16();
226 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
227 RESULT_1BC_S16_1PIXEL();
228 vis_st_u16(res, dstPixelPtr++);
229 }
230
231 if (i <= cols - 10) {
232
233 NEXT_PIXEL_1BC_S16();
234 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
235
236 NEXT_PIXEL_1BC_S16();
237
238 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4);
239 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4);
240 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4);
241 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4);
242
243 FADD_1BC_S16();
244
245 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4);
246 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4);
247 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4);
248 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4);
249
250 #pragma pipeloop(0)
251 for (; i <= cols - 14; i += 4) {
252 *(mlib_d64*)dstPixelPtr = res;
253 FADD_1BC_S16();
254 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4);
255 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4);
256 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4);
257 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4);
258 dstPixelPtr += 4;
259 }
260
261 *(mlib_d64*)dstPixelPtr = res;
262 dstPixelPtr += 4;
263 FADD_1BC_S16();
264 *(mlib_d64*)dstPixelPtr = res;
265 dstPixelPtr += 4;
266
267 RESULT_1BC_S16_1PIXEL();
268 vis_st_u16(res, dstPixelPtr++);
269
270 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
271 RESULT_1BC_S16_1PIXEL();
272 vis_st_u16(res, dstPixelPtr++);
273 i += 10;
274 }
275
276 for (; i < cols; i++) {
277 NEXT_PIXEL_1BC_S16();
278 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
279 RESULT_1BC_S16_1PIXEL();
280 vis_st_u16(res, dstPixelPtr++);
281 }
282 }
283
284 return MLIB_SUCCESS;
285 }
286
287 /***************************************************************/
288 #define NEXT_PIXEL_2BC_S16() \
289 xSrc = (X >> MLIB_SHIFT)-1; \
290 ySrc = (Y >> MLIB_SHIFT)-1; \
291 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 1)
292
293 /***************************************************************/
294 #define LOAD_BC_S16_2CH_1PIXEL(mlib_filters_s16, mlib_filters_s16_4) \
295 dpSrc = vis_alignaddr(sPtr, 0); \
296 data0 = dpSrc[0]; \
297 data1 = dpSrc[1]; \
298 data2 = dpSrc[2]; \
299 row00 = vis_faligndata(data0, data1); \
300 row01 = vis_faligndata(data1, data2); \
301 sPtr += srcYStride; \
302 dpSrc = vis_alignaddr(sPtr, 0); \
303 data0 = dpSrc[0]; \
304 data1 = dpSrc[1]; \
305 data2 = dpSrc[2]; \
306 row10 = vis_faligndata(data0, data1); \
307 row11 = vis_faligndata(data1, data2); \
308 sPtr += srcYStride; \
309 dpSrc = vis_alignaddr(sPtr, 0); \
310 data0 = dpSrc[0]; \
311 data1 = dpSrc[1]; \
312 data2 = dpSrc[2]; \
313 row20 = vis_faligndata(data0, data1); \
314 row21 = vis_faligndata(data1, data2); \
315 sPtr += srcYStride; \
316 dpSrc = vis_alignaddr(sPtr, 0); \
317 data0 = dpSrc[0]; \
318 data1 = dpSrc[1]; \
319 data2 = dpSrc[2]; \
320 row30 = vis_faligndata(data0, data1); \
321 row31 = vis_faligndata(data1, data2); \
322 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \
323 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
324 yFilter0 = yPtr[0]; \
325 yFilter1 = yPtr[1]; \
326 yFilter2 = yPtr[2]; \
327 yFilter3 = yPtr[3]; \
328 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \
329 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \
330 X += dX; \
331 Y += dY
332
333 /***************************************************************/
334 #define RESULT_2BC_S16_1PIXEL() \
335 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \
336 dr = vis_fpmerge(vis_read_hi(xFilter), vis_read_lo(xFilter)); \
337 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \
338 dr = vis_fpmerge(vis_read_hi(dr), vis_read_lo(dr)); \
339 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \
340 dr1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr)); \
341 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \
342 dr = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr)); \
343 u20 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \
344 v00 = vis_fpadd16(u00, u01); \
345 u21 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \
346 v01 = vis_fpadd16(u10, u11); \
347 u00 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \
348 xFilter0 = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr1)); \
349 u01 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \
350 u10 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \
351 u11 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \
352 v10 = vis_fpadd16(u20, u21); \
353 sum0 = vis_fpadd16(v00, v10); \
354 u20 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \
355 v11 = vis_fpadd16(u00, u01); \
356 u21 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \
357 xFilter1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr1)); \
358 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \
359 v20 = vis_fpadd16(u10, u11); \
360 sum1 = vis_fpadd16(v01, v11); \
361 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \
362 sum0 = vis_fpadd16(sum0, v20); \
363 v21 = vis_fpadd16(u20, u21); \
364 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \
365 v30 = vis_fpadd16(u00, u01); \
366 sum1 = vis_fpadd16(sum1, v21); \
367 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \
368 sum0 = vis_fpadd16(sum0, v30); \
369 v31 = vis_fpadd16(u10, u11); \
370 sum1 = vis_fpadd16(sum1, v31); \
371 d00 = vis_fmul8sux16(sum0, xFilter0); \
372 d10 = vis_fmul8ulx16(sum0, xFilter0); \
373 d20 = vis_fmul8sux16(sum1, xFilter1); \
374 d30 = vis_fmul8ulx16(sum1, xFilter1); \
375 d0 = vis_fpadd16(d00, d10); \
376 d1 = vis_fpadd16(d20, d30); \
377 d0 = vis_fpadd16(d0, d1); \
378 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \
379 d0 = vis_fmuld8sux16(f_x01000100, p0); \
380 res = vis_fxor(vis_fpackfix_pair(d0, d0), mask8000)
381
382 /***************************************************************/
383 #define BC_S16_2CH(ind, mlib_filters_s16, mlib_filters_s16_4) \
384 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \
385 dr = vis_fpmerge(vis_read_hi(xFilter), vis_read_lo(xFilter)); \
386 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \
387 dr = vis_fpmerge(vis_read_hi(dr), vis_read_lo(dr)); \
388 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \
389 dr1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr)); \
390 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \
391 dr = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr)); \
392 dpSrc = vis_alignaddr(sPtr, 0); \
393 u20 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \
394 v00 = vis_fpadd16(u00, u01); \
395 u21 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \
396 data0 = dpSrc[0]; \
397 filterposy = (Y >> FILTER_SHIFT); \
398 v01 = vis_fpadd16(u10, u11); \
399 data1 = dpSrc[1]; \
400 u00 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \
401 xFilter0 = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr1)); \
402 data2 = dpSrc[2]; \
403 u01 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \
404 row00 = vis_faligndata(data0, data1); \
405 u10 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \
406 row01 = vis_faligndata(data1, data2); \
407 filterposx = (X >> FILTER_SHIFT); \
408 sPtr += srcYStride; \
409 dpSrc = vis_alignaddr(sPtr, 0); \
410 u11 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \
411 v10 = vis_fpadd16(u20, u21); \
412 data0 = dpSrc[0]; \
413 sum0 = vis_fpadd16(v00, v10); \
414 X += dX; \
415 data1 = dpSrc[1]; \
416 u20 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \
417 v11 = vis_fpadd16(u00, u01); \
418 data2 = dpSrc[2]; \
419 row10 = vis_faligndata(data0, data1); \
420 u21 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \
421 row11 = vis_faligndata(data1, data2); \
422 sPtr += srcYStride; \
423 xFilter1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr1)); \
424 dpSrc = vis_alignaddr(sPtr, 0); \
425 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \
426 v20 = vis_fpadd16(u10, u11); \
427 Y += dY; \
428 xSrc = (X >> MLIB_SHIFT)-1; \
429 sum1 = vis_fpadd16(v01, v11); \
430 data0 = dpSrc[0]; \
431 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \
432 sum0 = vis_fpadd16(sum0, v20); \
433 ySrc = (Y >> MLIB_SHIFT)-1; \
434 data1 = dpSrc[1]; \
435 v21 = vis_fpadd16(u20, u21); \
436 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \
437 data2 = dpSrc[2]; \
438 v30 = vis_fpadd16(u00, u01); \
439 filterposy &= FILTER_MASK; \
440 row20 = vis_faligndata(data0, data1); \
441 sum1 = vis_fpadd16(sum1, v21); \
442 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \
443 row21 = vis_faligndata(data1, data2); \
444 sPtr += srcYStride; \
445 filterposx &= FILTER_MASK; \
446 v31 = vis_fpadd16(u10, u11); \
447 dpSrc = vis_alignaddr(sPtr, 0); \
448 data0 = dpSrc[0]; \
449 sum0 = vis_fpadd16(sum0, v30); \
450 data1 = dpSrc[1]; \
451 sum1 = vis_fpadd16(sum1, v31); \
452 data2 = dpSrc[2]; \
453 row30 = vis_faligndata(data0, data1); \
454 d0 = vis_fmul8sux16(sum0, xFilter0); \
455 row31 = vis_faligndata(data1, data2); \
456 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
457 d1 = vis_fmul8ulx16(sum0, xFilter0); \
458 yFilter0 = yPtr[0]; \
459 d2 = vis_fmul8sux16(sum1, xFilter1); \
460 yFilter1 = yPtr[1]; \
461 d3 = vis_fmul8ulx16(sum1, xFilter1); \
462 d0##ind = vis_fpadd16(d0, d1); \
463 yFilter2 = yPtr[2]; \
464 yFilter3 = yPtr[3]; \
465 d1##ind = vis_fpadd16(d2, d3); \
466 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \
467 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 1)
468
469 /***************************************************************/
470 #define FADD_2BC_S16() \
471 d0 = vis_fpadd16(d00, d10); \
472 d2 = vis_fpadd16(d01, d11); \
473 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \
474 p1 = vis_fpadd16s(vis_read_hi(d2), vis_read_lo(d2)); \
475 d0 = vis_fmuld8sux16(f_x01000100, p0); \
476 d1 = vis_fmuld8sux16(f_x01000100, p1); \
477 res = vis_fxor(vis_fpackfix_pair(d0, d1), mask8000)
478
479 /***************************************************************/
mlib_ImageAffine_u16_2ch_bc(mlib_affine_param * param)480 mlib_status mlib_ImageAffine_u16_2ch_bc (mlib_affine_param *param)
481 {
482 DECLAREVAR_BC();
483 DTYPE *dstLineEnd;
484 mlib_s32 filterposx, filterposy;
485 mlib_d64 data0, data1, data2;
486 mlib_d64 sum0, sum1;
487 mlib_d64 row00, row10, row20, row30;
488 mlib_d64 row01, row11, row21, row31;
489 mlib_f32 p0, p1;
490 mlib_d64 xFilter, xFilter0, xFilter1;
491 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3;
492 mlib_d64 v00, v01, v10, v11, v20, v21, v30, v31;
493 mlib_d64 u00, u01, u10, u11, u20, u21;
494 mlib_d64 d0, d1, d2, d3;
495 mlib_d64 d00, d10, d20, d30, d01, d11;
496 mlib_d64 *yPtr;
497 mlib_d64 *dp, *dpSrc;
498 mlib_s32 cols, i, mask, emask;
499 mlib_d64 res, res1;
500 mlib_d64 dr, dr1;
501 mlib_f32 f_x01000100 = vis_to_float(0x01000100);
502 mlib_d64 mask8000 = vis_to_double_dup(0x80008000);
503 const mlib_s16 *mlib_filters_table ;
504 const mlib_s16 *mlib_filters_table_4;
505
506 if (filter == MLIB_BICUBIC) {
507 mlib_filters_table = mlib_filters_s16_bc;
508 mlib_filters_table_4 = mlib_filters_s16_bc_4;
509 } else {
510 mlib_filters_table = mlib_filters_s16_bc2;
511 mlib_filters_table_4 = mlib_filters_s16_bc2_4;
512 }
513
514 srcYStride >>= 1;
515
516 for (j = yStart; j <= yFinish; j++) {
517
518 vis_write_gsr(10 << 3);
519
520 CLIP(2);
521 dstLineEnd = (DTYPE*)dstData + 2 * xRight;
522
523 cols = xRight - xLeft + 1;
524 dp = vis_alignaddr(dstPixelPtr, 0);
525 dstLineEnd += 1;
526 mask = vis_edge16(dstPixelPtr, dstLineEnd);
527 i = 0;
528
529 if (i <= cols - 6) {
530
531 NEXT_PIXEL_2BC_S16();
532 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
533
534 NEXT_PIXEL_2BC_S16();
535
536 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4);
537 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4);
538
539 FADD_2BC_S16();
540
541 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4);
542 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4);
543
544 #pragma pipeloop(0)
545 for (; i <= cols-8; i += 2) {
546 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
547 res = vis_faligndata(res, res);
548 vis_pst_16(res, dp++, mask);
549 vis_pst_16(res, dp, ~mask);
550 FADD_2BC_S16();
551 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4);
552 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4);
553 }
554
555 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
556 res = vis_faligndata(res, res);
557 vis_pst_16(res, dp++, mask);
558 vis_pst_16(res, dp, ~mask);
559
560 FADD_2BC_S16();
561 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
562 res = vis_faligndata(res, res);
563 vis_pst_16(res, dp++, mask);
564 vis_pst_16(res, dp, ~mask);
565
566 RESULT_2BC_S16_1PIXEL();
567 res1 = res;
568
569 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
570 RESULT_2BC_S16_1PIXEL();
571 res = vis_write_hi(res, vis_read_hi(res1));
572 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
573 res = vis_faligndata(res, res);
574 vis_pst_16(res, dp++, mask);
575 vis_pst_16(res, dp, ~mask);
576
577 i += 6;
578 }
579
580 if (i <= cols - 4) {
581 NEXT_PIXEL_2BC_S16();
582 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
583
584 NEXT_PIXEL_2BC_S16();
585
586 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4);
587 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4);
588
589 FADD_2BC_S16();
590 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
591 res = vis_faligndata(res, res);
592 vis_pst_16(res, dp++, mask);
593 vis_pst_16(res, dp, ~mask);
594
595 RESULT_2BC_S16_1PIXEL();
596 res1 = res;
597
598 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
599 RESULT_2BC_S16_1PIXEL();
600 res = vis_write_hi(res, vis_read_hi(res1));
601 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
602 res = vis_faligndata(res, res);
603 vis_pst_16(res, dp++, mask);
604 vis_pst_16(res, dp, ~mask);
605
606 i += 4;
607 }
608
609 if (i <= cols - 2) {
610 NEXT_PIXEL_2BC_S16();
611 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
612 RESULT_2BC_S16_1PIXEL();
613 res1 = res;
614
615 NEXT_PIXEL_2BC_S16();
616 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
617 RESULT_2BC_S16_1PIXEL();
618 res = vis_write_hi(res, vis_read_hi(res1));
619 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
620 res = vis_faligndata(res, res);
621 vis_pst_16(res, dp++, mask);
622 vis_pst_16(res, dp, ~mask);
623
624 i += 2;
625 }
626
627 if (i < cols) {
628 NEXT_PIXEL_2BC_S16();
629 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
630 RESULT_2BC_S16_1PIXEL();
631 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
632 res = vis_faligndata(res, res);
633 emask = vis_edge16(dp, dstLineEnd);
634 vis_pst_16(res, dp++, mask & emask);
635
636 if ((mlib_s16*)dp <= dstLineEnd) {
637 mask = vis_edge16(dp, dstLineEnd);
638 vis_pst_16(res, dp, mask);
639 }
640 }
641 }
642
643 return MLIB_SUCCESS;
644 }
645
646 /***************************************************************/
647 #define NEXT_PIXEL_3BC_S16() \
648 xSrc = (X >> MLIB_SHIFT)-1; \
649 ySrc = (Y >> MLIB_SHIFT)-1; \
650 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc*3)
651
652 /***************************************************************/
653 #define LOAD_BC_S16_3CH_1PIXEL(mlib_filters_s16_3, mlib_filters_s16_4) \
654 dpSrc = vis_alignaddr(sPtr, 0); \
655 data0 = dpSrc[0]; \
656 data1 = dpSrc[1]; \
657 data2 = dpSrc[2]; \
658 data3 = dpSrc[3]; \
659 row00 = vis_faligndata(data0, data1); \
660 row01 = vis_faligndata(data1, data2); \
661 row02 = vis_faligndata(data2, data3); \
662 sPtr += srcYStride; \
663 dpSrc = vis_alignaddr(sPtr, 0); \
664 data0 = dpSrc[0]; \
665 data1 = dpSrc[1]; \
666 data2 = dpSrc[2]; \
667 data3 = dpSrc[3]; \
668 row10 = vis_faligndata(data0, data1); \
669 row11 = vis_faligndata(data1, data2); \
670 row12 = vis_faligndata(data2, data3); \
671 sPtr += srcYStride; \
672 dpSrc = vis_alignaddr(sPtr, 0); \
673 data0 = dpSrc[0]; \
674 data1 = dpSrc[1]; \
675 data2 = dpSrc[2]; \
676 data3 = dpSrc[3]; \
677 row20 = vis_faligndata(data0, data1); \
678 row21 = vis_faligndata(data1, data2); \
679 row22 = vis_faligndata(data2, data3); \
680 sPtr += srcYStride; \
681 dpSrc = vis_alignaddr(sPtr, 0); \
682 data0 = dpSrc[0]; \
683 data1 = dpSrc[1]; \
684 data2 = dpSrc[2]; \
685 data3 = dpSrc[3]; \
686 row30 = vis_faligndata(data0, data1); \
687 row31 = vis_faligndata(data1, data2); \
688 row32 = vis_faligndata(data2, data3); \
689 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \
690 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
691 yFilter0 = yPtr[0]; \
692 yFilter1 = yPtr[1]; \
693 yFilter2 = yPtr[2]; \
694 yFilter3 = yPtr[3]; \
695 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \
696 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_3 + filterposx*3)); \
697 xFilter0 = xPtr[0]; \
698 xFilter1 = xPtr[1]; \
699 xFilter2 = xPtr[2]; \
700 X += dX; \
701 Y += dY
702
703 /***************************************************************/
704 #define STORE_BC_S16_3CH_1PIXEL() \
705 dstPixelPtr[0] = f0.t[0]; \
706 dstPixelPtr[1] = f0.t[1]; \
707 dstPixelPtr[2] = f0.t[2]; \
708 dstPixelPtr += 3
709
710 /***************************************************************/
711 #define RESULT_3BC_S16_1PIXEL() \
712 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \
713 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \
714 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \
715 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \
716 v00 = vis_fpadd16(u00, u01); \
717 u20 = vis_fmul8sux16(vis_fxor(row02, mask8000), yFilter0); \
718 v01 = vis_fpadd16(u10, u11); \
719 u21 = vis_fmul8ulx16(vis_fxor(row02, mask8000), yFilter0); \
720 u00 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \
721 u01 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \
722 v02 = vis_fpadd16(u20, u21); \
723 u10 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \
724 u11 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \
725 v10 = vis_fpadd16(u00, u01); \
726 u20 = vis_fmul8sux16(vis_fxor(row12, mask8000), yFilter1); \
727 u21 = vis_fmul8ulx16(vis_fxor(row12, mask8000), yFilter1); \
728 u00 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \
729 v11 = vis_fpadd16(u10, u11); \
730 u01 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \
731 v12 = vis_fpadd16(u20, u21); \
732 u10 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \
733 u11 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \
734 v20 = vis_fpadd16(u00, u01); \
735 u20 = vis_fmul8sux16(vis_fxor(row22, mask8000), yFilter2); \
736 sum0 = vis_fpadd16(v00, v10); \
737 u21 = vis_fmul8ulx16(vis_fxor(row22, mask8000), yFilter2); \
738 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \
739 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \
740 v21 = vis_fpadd16(u10, u11); \
741 sum1 = vis_fpadd16(v01, v11); \
742 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \
743 sum2 = vis_fpadd16(v02, v12); \
744 v22 = vis_fpadd16(u20, u21); \
745 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \
746 sum0 = vis_fpadd16(sum0, v20); \
747 u20 = vis_fmul8sux16(vis_fxor(row32, mask8000), yFilter3); \
748 v30 = vis_fpadd16(u00, u01); \
749 sum1 = vis_fpadd16(sum1, v21); \
750 u21 = vis_fmul8ulx16(vis_fxor(row32, mask8000), yFilter3); \
751 v31 = vis_fpadd16(u10, u11); \
752 sum2 = vis_fpadd16(sum2, v22); \
753 v32 = vis_fpadd16(u20, u21); \
754 sum0 = vis_fpadd16(sum0, v30); \
755 row30 = vis_faligndata(data0, data1); \
756 v00 = vis_fmul8sux16(sum0, xFilter0); \
757 sum1 = vis_fpadd16(sum1, v31); \
758 sum2 = vis_fpadd16(sum2, v32); \
759 v01 = vis_fmul8ulx16(sum0, xFilter0); \
760 v10 = vis_fmul8sux16(sum1, xFilter1); \
761 v11 = vis_fmul8ulx16(sum1, xFilter1); \
762 d0 = vis_fpadd16(v00, v01); \
763 v20 = vis_fmul8sux16(sum2, xFilter2); \
764 v21 = vis_fmul8ulx16(sum2, xFilter2); \
765 d1 = vis_fpadd16(v10, v11); \
766 d2 = vis_fpadd16(v20, v21); \
767 vis_alignaddr((void*)6, 0); \
768 d3 = vis_faligndata(d0, d1); \
769 vis_alignaddr((void*)2, 0); \
770 d4 = vis_faligndata(d1, d2); \
771 d0 = vis_fpadd16(d0, d3); \
772 d2 = vis_fpadd16(d2, d4); \
773 d1 = vis_faligndata(d2, d2); \
774 d0 = vis_fpadd16(d0, d1); \
775 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \
776 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \
777 f0.d = vis_fxor(vis_fpackfix_pair(d2, d3), mask8000)
778
779 /***************************************************************/
780 #define BC_S16_3CH(mlib_filters_s16_3, mlib_filters_s16_4) \
781 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \
782 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \
783 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \
784 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \
785 v00 = vis_fpadd16(u00, u01); \
786 u20 = vis_fmul8sux16(vis_fxor(row02, mask8000), yFilter0); \
787 v01 = vis_fpadd16(u10, u11); \
788 u21 = vis_fmul8ulx16(vis_fxor(row02, mask8000), yFilter0); \
789 dpSrc = vis_alignaddr(sPtr, 0); \
790 u00 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \
791 u01 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \
792 data0 = dpSrc[0]; \
793 filterposy = (Y >> FILTER_SHIFT); \
794 v02 = vis_fpadd16(u20, u21); \
795 data1 = dpSrc[1]; \
796 u10 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \
797 data2 = dpSrc[2]; \
798 u11 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \
799 v10 = vis_fpadd16(u00, u01); \
800 data3 = dpSrc[3]; \
801 u20 = vis_fmul8sux16(vis_fxor(row12, mask8000), yFilter1); \
802 row00 = vis_faligndata(data0, data1); \
803 u21 = vis_fmul8ulx16(vis_fxor(row12, mask8000), yFilter1); \
804 row01 = vis_faligndata(data1, data2); \
805 u00 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \
806 row02 = vis_faligndata(data2, data3); \
807 filterposx = (X >> FILTER_SHIFT); \
808 sPtr += srcYStride; \
809 dpSrc = vis_alignaddr(sPtr, 0); \
810 v11 = vis_fpadd16(u10, u11); \
811 u01 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \
812 v12 = vis_fpadd16(u20, u21); \
813 data0 = dpSrc[0]; \
814 u10 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \
815 X += dX; \
816 data1 = dpSrc[1]; \
817 u11 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \
818 v20 = vis_fpadd16(u00, u01); \
819 data2 = dpSrc[2]; \
820 u20 = vis_fmul8sux16(vis_fxor(row22, mask8000), yFilter2); \
821 sum0 = vis_fpadd16(v00, v10); \
822 data3 = dpSrc[3]; \
823 row10 = vis_faligndata(data0, data1); \
824 u21 = vis_fmul8ulx16(vis_fxor(row22, mask8000), yFilter2); \
825 row11 = vis_faligndata(data1, data2); \
826 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \
827 row12 = vis_faligndata(data2, data3); \
828 sPtr += srcYStride; \
829 dpSrc = vis_alignaddr(sPtr, 0); \
830 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \
831 v21 = vis_fpadd16(u10, u11); \
832 Y += dY; \
833 xSrc = (X >> MLIB_SHIFT)-1; \
834 sum1 = vis_fpadd16(v01, v11); \
835 data0 = dpSrc[0]; \
836 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \
837 sum2 = vis_fpadd16(v02, v12); \
838 ySrc = (Y >> MLIB_SHIFT)-1; \
839 data1 = dpSrc[1]; \
840 v22 = vis_fpadd16(u20, u21); \
841 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \
842 data2 = dpSrc[2]; \
843 sum0 = vis_fpadd16(sum0, v20); \
844 u20 = vis_fmul8sux16(vis_fxor(row32, mask8000), yFilter3); \
845 data3 = dpSrc[3]; \
846 v30 = vis_fpadd16(u00, u01); \
847 filterposy &= FILTER_MASK; \
848 row20 = vis_faligndata(data0, data1); \
849 sum1 = vis_fpadd16(sum1, v21); \
850 u21 = vis_fmul8ulx16(vis_fxor(row32, mask8000), yFilter3); \
851 row21 = vis_faligndata(data1, data2); \
852 row22 = vis_faligndata(data2, data3); \
853 sPtr += srcYStride; \
854 filterposx &= FILTER_MASK; \
855 v31 = vis_fpadd16(u10, u11); \
856 dpSrc = vis_alignaddr(sPtr, 0); \
857 data0 = dpSrc[0]; \
858 sum2 = vis_fpadd16(sum2, v22); \
859 data1 = dpSrc[1]; \
860 v32 = vis_fpadd16(u20, u21); \
861 data2 = dpSrc[2]; \
862 sum0 = vis_fpadd16(sum0, v30); \
863 data3 = dpSrc[3]; \
864 row30 = vis_faligndata(data0, data1); \
865 v00 = vis_fmul8sux16(sum0, xFilter0); \
866 row31 = vis_faligndata(data1, data2); \
867 row32 = vis_faligndata(data2, data3); \
868 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
869 sum1 = vis_fpadd16(sum1, v31); \
870 yFilter0 = yPtr[0]; \
871 sum2 = vis_fpadd16(sum2, v32); \
872 v01 = vis_fmul8ulx16(sum0, xFilter0); \
873 yFilter1 = yPtr[1]; \
874 v10 = vis_fmul8sux16(sum1, xFilter1); \
875 yFilter2 = yPtr[2]; \
876 v11 = vis_fmul8ulx16(sum1, xFilter1); \
877 d0 = vis_fpadd16(v00, v01); \
878 yFilter3 = yPtr[3]; \
879 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_3 + filterposx*3)); \
880 v20 = vis_fmul8sux16(sum2, xFilter2); \
881 xFilter0 = xPtr[0]; \
882 v21 = vis_fmul8ulx16(sum2, xFilter2); \
883 d1 = vis_fpadd16(v10, v11); \
884 xFilter1 = xPtr[1]; \
885 d2 = vis_fpadd16(v20, v21); \
886 xFilter2 = xPtr[2]; \
887 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc*3)
888
889 /***************************************************************/
890 #define FADD_3BC_S16() \
891 vis_alignaddr((void*)6, 0); \
892 d3 = vis_faligndata(d0, d1); \
893 vis_alignaddr((void*)2, 0); \
894 d4 = vis_faligndata(d1, d2); \
895 d0 = vis_fpadd16(d0, d3); \
896 d2 = vis_fpadd16(d2, d4); \
897 d1 = vis_faligndata(d2, d2); \
898 d0 = vis_fpadd16(d0, d1); \
899 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \
900 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \
901 f0.d = vis_fxor(vis_fpackfix_pair(d2, d3), mask8000)
902
903 /***************************************************************/
mlib_ImageAffine_u16_3ch_bc(mlib_affine_param * param)904 mlib_status mlib_ImageAffine_u16_3ch_bc (mlib_affine_param *param)
905 {
906 DECLAREVAR_BC();
907 mlib_s32 filterposx, filterposy;
908 mlib_d64 data0, data1, data2, data3;
909 mlib_d64 sum0, sum1, sum2;
910 mlib_d64 row00, row10, row20, row30;
911 mlib_d64 row01, row11, row21, row31;
912 mlib_d64 row02, row12, row22, row32;
913 mlib_d64 xFilter0, xFilter1, xFilter2;
914 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3;
915 mlib_d64 v00, v01, v02, v10, v11, v12, v20, v21, v22, v30, v31, v32;
916 mlib_d64 u00, u01, u10, u11, u20, u21;
917 mlib_d64 d0, d1, d2, d3, d4;
918 mlib_d64 *yPtr, *xPtr;
919 mlib_d64 *dpSrc;
920 mlib_s32 cols, i;
921 mlib_f32 f_x01000100 = vis_to_float(0x01000100);
922 mlib_d64 mask8000 = vis_to_double_dup(0x80008000);
923 union {
924 mlib_s16 t[4];
925 mlib_d64 d;
926 } f0;
927 const mlib_s16 *mlib_filters_table_3;
928 const mlib_s16 *mlib_filters_table_4;
929
930 if (filter == MLIB_BICUBIC) {
931 mlib_filters_table_3 = mlib_filters_s16_bc_3;
932 mlib_filters_table_4 = mlib_filters_s16_bc_4;
933 } else {
934 mlib_filters_table_3 = mlib_filters_s16_bc2_3;
935 mlib_filters_table_4 = mlib_filters_s16_bc2_4;
936 }
937
938 srcYStride >>= 1;
939
940 for (j = yStart; j <= yFinish; j++) {
941
942 vis_write_gsr(10 << 3);
943
944 CLIP(3);
945
946 cols = xRight - xLeft + 1;
947
948 i = 0;
949
950 if (i <= cols - 4) {
951
952 NEXT_PIXEL_3BC_S16();
953 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4);
954
955 NEXT_PIXEL_3BC_S16();
956
957 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4);
958 FADD_3BC_S16();
959
960 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4);
961
962 #pragma pipeloop(0)
963 for (; i < cols-4; i++) {
964 STORE_BC_S16_3CH_1PIXEL();
965
966 FADD_3BC_S16();
967 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4);
968 }
969
970 STORE_BC_S16_3CH_1PIXEL();
971
972 FADD_3BC_S16();
973 STORE_BC_S16_3CH_1PIXEL();
974
975 RESULT_3BC_S16_1PIXEL();
976 STORE_BC_S16_3CH_1PIXEL();
977
978 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4);
979 RESULT_3BC_S16_1PIXEL();
980 STORE_BC_S16_3CH_1PIXEL();
981 i += 4;
982 }
983
984 for (; i < cols; i++) {
985 NEXT_PIXEL_3BC_S16();
986 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4);
987 RESULT_3BC_S16_1PIXEL();
988 STORE_BC_S16_3CH_1PIXEL();
989 }
990 }
991
992 return MLIB_SUCCESS;
993 }
994
995 /***************************************************************/
996 #define NEXT_PIXEL_4BC_S16() \
997 xSrc = (X >> MLIB_SHIFT)-1; \
998 ySrc = (Y >> MLIB_SHIFT)-1; \
999 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 2)
1000
1001 /***************************************************************/
1002 #define LOAD_BC_S16_4CH_1PIXEL(mlib_filters_s16_4) \
1003 dpSrc = vis_alignaddr(sPtr, 0); \
1004 data0 = dpSrc[0]; \
1005 data1 = dpSrc[1]; \
1006 data2 = dpSrc[2]; \
1007 data3 = dpSrc[3]; \
1008 data4 = dpSrc[4]; \
1009 row00 = vis_faligndata(data0, data1); \
1010 row01 = vis_faligndata(data1, data2); \
1011 row02 = vis_faligndata(data2, data3); \
1012 row03 = vis_faligndata(data3, data4); \
1013 sPtr += srcYStride; \
1014 dpSrc = vis_alignaddr(sPtr, 0); \
1015 data0 = dpSrc[0]; \
1016 data1 = dpSrc[1]; \
1017 data2 = dpSrc[2]; \
1018 data3 = dpSrc[3]; \
1019 data4 = dpSrc[4]; \
1020 row10 = vis_faligndata(data0, data1); \
1021 row11 = vis_faligndata(data1, data2); \
1022 row12 = vis_faligndata(data2, data3); \
1023 row13 = vis_faligndata(data3, data4); \
1024 sPtr += srcYStride; \
1025 dpSrc = vis_alignaddr(sPtr, 0); \
1026 data0 = dpSrc[0]; \
1027 data1 = dpSrc[1]; \
1028 data2 = dpSrc[2]; \
1029 data3 = dpSrc[3]; \
1030 data4 = dpSrc[4]; \
1031 row20 = vis_faligndata(data0, data1); \
1032 row21 = vis_faligndata(data1, data2); \
1033 row22 = vis_faligndata(data2, data3); \
1034 row23 = vis_faligndata(data3, data4); \
1035 sPtr += srcYStride; \
1036 dpSrc = vis_alignaddr(sPtr, 0); \
1037 data0 = dpSrc[0]; \
1038 data1 = dpSrc[1]; \
1039 data2 = dpSrc[2]; \
1040 data3 = dpSrc[3]; \
1041 data4 = dpSrc[4]; \
1042 row30 = vis_faligndata(data0, data1); \
1043 row31 = vis_faligndata(data1, data2); \
1044 row32 = vis_faligndata(data2, data3); \
1045 row33 = vis_faligndata(data3, data4); \
1046 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \
1047 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
1048 yFilter0 = yPtr[0]; \
1049 yFilter1 = yPtr[1]; \
1050 yFilter2 = yPtr[2]; \
1051 yFilter3 = yPtr[3]; \
1052 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \
1053 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \
1054 xFilter0 = xPtr[0]; \
1055 xFilter1 = xPtr[1]; \
1056 xFilter2 = xPtr[2]; \
1057 xFilter3 = xPtr[3]; \
1058 X += dX; \
1059 Y += dY
1060
1061 /***************************************************************/
1062 #define RESULT_4BC_S16_1PIXEL() \
1063 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \
1064 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \
1065 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \
1066 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \
1067 v00 = vis_fpadd16(u00, u01); \
1068 u20 = vis_fmul8sux16(vis_fxor(row02, mask8000), yFilter0); \
1069 v01 = vis_fpadd16(u10, u11); \
1070 u21 = vis_fmul8ulx16(vis_fxor(row02, mask8000), yFilter0); \
1071 u30 = vis_fmul8sux16(vis_fxor(row03, mask8000), yFilter0); \
1072 u31 = vis_fmul8ulx16(vis_fxor(row03, mask8000), yFilter0); \
1073 v02 = vis_fpadd16(u20, u21); \
1074 u00 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \
1075 u01 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \
1076 v03 = vis_fpadd16(u30, u31); \
1077 u10 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \
1078 u11 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \
1079 v10 = vis_fpadd16(u00, u01); \
1080 u20 = vis_fmul8sux16(vis_fxor(row12, mask8000), yFilter1); \
1081 v11 = vis_fpadd16(u10, u11); \
1082 u21 = vis_fmul8ulx16(vis_fxor(row12, mask8000), yFilter1); \
1083 u30 = vis_fmul8sux16(vis_fxor(row13, mask8000), yFilter1); \
1084 u31 = vis_fmul8ulx16(vis_fxor(row13, mask8000), yFilter1); \
1085 u00 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \
1086 v12 = vis_fpadd16(u20, u21); \
1087 u01 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \
1088 v13 = vis_fpadd16(u30, u31); \
1089 u10 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \
1090 u11 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \
1091 v20 = vis_fpadd16(u00, u01); \
1092 u20 = vis_fmul8sux16(vis_fxor(row22, mask8000), yFilter2); \
1093 sum0 = vis_fpadd16(v00, v10); \
1094 u21 = vis_fmul8ulx16(vis_fxor(row22, mask8000), yFilter2); \
1095 u30 = vis_fmul8sux16(vis_fxor(row23, mask8000), yFilter2); \
1096 u31 = vis_fmul8ulx16(vis_fxor(row23, mask8000), yFilter2); \
1097 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \
1098 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \
1099 v21 = vis_fpadd16(u10, u11); \
1100 sum1 = vis_fpadd16(v01, v11); \
1101 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \
1102 sum2 = vis_fpadd16(v02, v12); \
1103 sum3 = vis_fpadd16(v03, v13); \
1104 v22 = vis_fpadd16(u20, u21); \
1105 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \
1106 sum0 = vis_fpadd16(sum0, v20); \
1107 u20 = vis_fmul8sux16(vis_fxor(row32, mask8000), yFilter3); \
1108 u21 = vis_fmul8ulx16(vis_fxor(row32, mask8000), yFilter3); \
1109 v23 = vis_fpadd16(u30, u31); \
1110 v30 = vis_fpadd16(u00, u01); \
1111 sum1 = vis_fpadd16(sum1, v21); \
1112 u30 = vis_fmul8sux16(vis_fxor(row33, mask8000), yFilter3); \
1113 u31 = vis_fmul8ulx16(vis_fxor(row33, mask8000), yFilter3); \
1114 v31 = vis_fpadd16(u10, u11); \
1115 sum2 = vis_fpadd16(sum2, v22); \
1116 sum3 = vis_fpadd16(sum3, v23); \
1117 v32 = vis_fpadd16(u20, u21); \
1118 sum0 = vis_fpadd16(sum0, v30); \
1119 v33 = vis_fpadd16(u30, u31); \
1120 v00 = vis_fmul8sux16(sum0, xFilter0); \
1121 sum1 = vis_fpadd16(sum1, v31); \
1122 sum2 = vis_fpadd16(sum2, v32); \
1123 v01 = vis_fmul8ulx16(sum0, xFilter0); \
1124 v10 = vis_fmul8sux16(sum1, xFilter1); \
1125 sum3 = vis_fpadd16(sum3, v33); \
1126 v11 = vis_fmul8ulx16(sum1, xFilter1); \
1127 d0 = vis_fpadd16(v00, v01); \
1128 v20 = vis_fmul8sux16(sum2, xFilter2); \
1129 v21 = vis_fmul8ulx16(sum2, xFilter2); \
1130 d1 = vis_fpadd16(v10, v11); \
1131 v30 = vis_fmul8sux16(sum3, xFilter3); \
1132 v31 = vis_fmul8ulx16(sum3, xFilter3); \
1133 d2 = vis_fpadd16(v20, v21); \
1134 d3 = vis_fpadd16(v30, v31); \
1135 d0 = vis_fpadd16(d0, d1); \
1136 d2 = vis_fpadd16(d2, d3); \
1137 d0 = vis_fpadd16(d0, d2); \
1138 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \
1139 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \
1140 res = vis_fxor(vis_fpackfix_pair(d2, d3), mask8000)
1141
1142 /***************************************************************/
1143 #define BC_S16_4CH(mlib_filters_s16_4) \
1144 u00 = vis_fmul8sux16(vis_fxor(row00, mask8000), yFilter0); \
1145 u01 = vis_fmul8ulx16(vis_fxor(row00, mask8000), yFilter0); \
1146 u10 = vis_fmul8sux16(vis_fxor(row01, mask8000), yFilter0); \
1147 u11 = vis_fmul8ulx16(vis_fxor(row01, mask8000), yFilter0); \
1148 v00 = vis_fpadd16(u00, u01); \
1149 u20 = vis_fmul8sux16(vis_fxor(row02, mask8000), yFilter0); \
1150 v01 = vis_fpadd16(u10, u11); \
1151 u21 = vis_fmul8ulx16(vis_fxor(row02, mask8000), yFilter0); \
1152 u30 = vis_fmul8sux16(vis_fxor(row03, mask8000), yFilter0); \
1153 u31 = vis_fmul8ulx16(vis_fxor(row03, mask8000), yFilter0); \
1154 v02 = vis_fpadd16(u20, u21); \
1155 dpSrc = vis_alignaddr(sPtr, 0); \
1156 u00 = vis_fmul8sux16(vis_fxor(row10, mask8000), yFilter1); \
1157 u01 = vis_fmul8ulx16(vis_fxor(row10, mask8000), yFilter1); \
1158 data0 = dpSrc[0]; \
1159 filterposy = (Y >> FILTER_SHIFT); \
1160 v03 = vis_fpadd16(u30, u31); \
1161 data1 = dpSrc[1]; \
1162 u10 = vis_fmul8sux16(vis_fxor(row11, mask8000), yFilter1); \
1163 data2 = dpSrc[2]; \
1164 u11 = vis_fmul8ulx16(vis_fxor(row11, mask8000), yFilter1); \
1165 v10 = vis_fpadd16(u00, u01); \
1166 data3 = dpSrc[3]; \
1167 u20 = vis_fmul8sux16(vis_fxor(row12, mask8000), yFilter1); \
1168 v11 = vis_fpadd16(u10, u11); \
1169 data4 = dpSrc[4]; \
1170 u21 = vis_fmul8ulx16(vis_fxor(row12, mask8000), yFilter1); \
1171 row00 = vis_faligndata(data0, data1); \
1172 u30 = vis_fmul8sux16(vis_fxor(row13, mask8000), yFilter1); \
1173 row01 = vis_faligndata(data1, data2); \
1174 u31 = vis_fmul8ulx16(vis_fxor(row13, mask8000), yFilter1); \
1175 row02 = vis_faligndata(data2, data3); \
1176 u00 = vis_fmul8sux16(vis_fxor(row20, mask8000), yFilter2); \
1177 row03 = vis_faligndata(data3, data4); \
1178 filterposx = (X >> FILTER_SHIFT); \
1179 sPtr += srcYStride; \
1180 v12 = vis_fpadd16(u20, u21); \
1181 dpSrc = vis_alignaddr(sPtr, 0); \
1182 u01 = vis_fmul8ulx16(vis_fxor(row20, mask8000), yFilter2); \
1183 v13 = vis_fpadd16(u30, u31); \
1184 data0 = dpSrc[0]; \
1185 u10 = vis_fmul8sux16(vis_fxor(row21, mask8000), yFilter2); \
1186 X += dX; \
1187 data1 = dpSrc[1]; \
1188 u11 = vis_fmul8ulx16(vis_fxor(row21, mask8000), yFilter2); \
1189 v20 = vis_fpadd16(u00, u01); \
1190 data2 = dpSrc[2]; \
1191 u20 = vis_fmul8sux16(vis_fxor(row22, mask8000), yFilter2); \
1192 sum0 = vis_fpadd16(v00, v10); \
1193 data3 = dpSrc[3]; \
1194 u21 = vis_fmul8ulx16(vis_fxor(row22, mask8000), yFilter2); \
1195 data4 = dpSrc[4]; \
1196 row10 = vis_faligndata(data0, data1); \
1197 u30 = vis_fmul8sux16(vis_fxor(row23, mask8000), yFilter2); \
1198 row11 = vis_faligndata(data1, data2); \
1199 u31 = vis_fmul8ulx16(vis_fxor(row23, mask8000), yFilter2); \
1200 row12 = vis_faligndata(data2, data3); \
1201 u00 = vis_fmul8sux16(vis_fxor(row30, mask8000), yFilter3); \
1202 row13 = vis_faligndata(data3, data4); \
1203 sPtr += srcYStride; \
1204 dpSrc = vis_alignaddr(sPtr, 0); \
1205 u01 = vis_fmul8ulx16(vis_fxor(row30, mask8000), yFilter3); \
1206 v21 = vis_fpadd16(u10, u11); \
1207 Y += dY; \
1208 xSrc = (X >> MLIB_SHIFT)-1; \
1209 sum1 = vis_fpadd16(v01, v11); \
1210 data0 = dpSrc[0]; \
1211 u10 = vis_fmul8sux16(vis_fxor(row31, mask8000), yFilter3); \
1212 sum2 = vis_fpadd16(v02, v12); \
1213 sum3 = vis_fpadd16(v03, v13); \
1214 ySrc = (Y >> MLIB_SHIFT)-1; \
1215 data1 = dpSrc[1]; \
1216 v22 = vis_fpadd16(u20, u21); \
1217 u11 = vis_fmul8ulx16(vis_fxor(row31, mask8000), yFilter3); \
1218 data2 = dpSrc[2]; \
1219 sum0 = vis_fpadd16(sum0, v20); \
1220 u20 = vis_fmul8sux16(vis_fxor(row32, mask8000), yFilter3); \
1221 data3 = dpSrc[3]; \
1222 u21 = vis_fmul8ulx16(vis_fxor(row32, mask8000), yFilter3); \
1223 v23 = vis_fpadd16(u30, u31); \
1224 data4 = dpSrc[4]; \
1225 v30 = vis_fpadd16(u00, u01); \
1226 filterposy &= FILTER_MASK; \
1227 row20 = vis_faligndata(data0, data1); \
1228 sum1 = vis_fpadd16(sum1, v21); \
1229 u30 = vis_fmul8sux16(vis_fxor(row33, mask8000), yFilter3); \
1230 row21 = vis_faligndata(data1, data2); \
1231 u31 = vis_fmul8ulx16(vis_fxor(row33, mask8000), yFilter3); \
1232 row22 = vis_faligndata(data2, data3); \
1233 row23 = vis_faligndata(data3, data4); \
1234 sPtr += srcYStride; \
1235 filterposx &= FILTER_MASK; \
1236 v31 = vis_fpadd16(u10, u11); \
1237 dpSrc = vis_alignaddr(sPtr, 0); \
1238 data0 = dpSrc[0]; \
1239 sum2 = vis_fpadd16(sum2, v22); \
1240 sum3 = vis_fpadd16(sum3, v23); \
1241 data1 = dpSrc[1]; \
1242 v32 = vis_fpadd16(u20, u21); \
1243 data2 = dpSrc[2]; \
1244 sum0 = vis_fpadd16(sum0, v30); \
1245 data3 = dpSrc[3]; \
1246 v33 = vis_fpadd16(u30, u31); \
1247 data4 = dpSrc[4]; \
1248 row30 = vis_faligndata(data0, data1); \
1249 v00 = vis_fmul8sux16(sum0, xFilter0); \
1250 row31 = vis_faligndata(data1, data2); \
1251 row32 = vis_faligndata(data2, data3); \
1252 row33 = vis_faligndata(data3, data4); \
1253 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
1254 sum1 = vis_fpadd16(sum1, v31); \
1255 yFilter0 = yPtr[0]; \
1256 sum2 = vis_fpadd16(sum2, v32); \
1257 v01 = vis_fmul8ulx16(sum0, xFilter0); \
1258 yFilter1 = yPtr[1]; \
1259 v10 = vis_fmul8sux16(sum1, xFilter1); \
1260 sum3 = vis_fpadd16(sum3, v33); \
1261 yFilter2 = yPtr[2]; \
1262 v11 = vis_fmul8ulx16(sum1, xFilter1); \
1263 d0 = vis_fpadd16(v00, v01); \
1264 yFilter3 = yPtr[3]; \
1265 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \
1266 v20 = vis_fmul8sux16(sum2, xFilter2); \
1267 xFilter0 = xPtr[0]; \
1268 v21 = vis_fmul8ulx16(sum2, xFilter2); \
1269 d1 = vis_fpadd16(v10, v11); \
1270 xFilter1 = xPtr[1]; \
1271 v30 = vis_fmul8sux16(sum3, xFilter3); \
1272 v31 = vis_fmul8ulx16(sum3, xFilter3); \
1273 d2 = vis_fpadd16(v20, v21); \
1274 xFilter2 = xPtr[2]; \
1275 d3 = vis_fpadd16(v30, v31); \
1276 xFilter3 = xPtr[3]; \
1277 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 2)
1278
1279 /***************************************************************/
1280 #define FADD_4BC_S16() \
1281 d0 = vis_fpadd16(d0, d1); \
1282 d2 = vis_fpadd16(d2, d3); \
1283 d0 = vis_fpadd16(d0, d2); \
1284 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \
1285 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \
1286 res = vis_fxor(vis_fpackfix_pair(d2, d3), mask8000)
1287
1288 /***************************************************************/
mlib_ImageAffine_u16_4ch_bc(mlib_affine_param * param)1289 mlib_status mlib_ImageAffine_u16_4ch_bc (mlib_affine_param *param)
1290 {
1291 DECLAREVAR_BC();
1292 DTYPE *dstLineEnd;
1293 mlib_s32 filterposx, filterposy;
1294 mlib_d64 data0, data1, data2, data3, data4;
1295 mlib_d64 sum0, sum1, sum2, sum3;
1296 mlib_d64 row00, row10, row20, row30;
1297 mlib_d64 row01, row11, row21, row31;
1298 mlib_d64 row02, row12, row22, row32;
1299 mlib_d64 row03, row13, row23, row33;
1300 mlib_d64 xFilter0, xFilter1, xFilter2, xFilter3;
1301 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3;
1302 mlib_d64 v00, v01, v02, v03, v10, v11, v12, v13;
1303 mlib_d64 v20, v21, v22, v23, v30, v31, v32, v33;
1304 mlib_d64 u00, u01, u10, u11, u20, u21, u30, u31;
1305 mlib_d64 d0, d1, d2, d3;
1306 mlib_d64 *yPtr, *xPtr;
1307 mlib_d64 *dp, *dpSrc;
1308 mlib_s32 cols, i, mask, gsrd;
1309 mlib_d64 res;
1310 mlib_f32 f_x01000100 = vis_to_float(0x01000100);
1311 mlib_d64 mask8000 = vis_to_double_dup(0x80008000);
1312 const mlib_s16 *mlib_filters_table_4;
1313
1314 if (filter == MLIB_BICUBIC) {
1315 mlib_filters_table_4 = mlib_filters_s16_bc_4;
1316 } else {
1317 mlib_filters_table_4 = mlib_filters_s16_bc2_4;
1318 }
1319
1320 srcYStride >>= 1;
1321
1322 for (j = yStart; j <= yFinish; j++) {
1323
1324 vis_write_gsr(10 << 3);
1325
1326 CLIP(4);
1327 dstLineEnd = (DTYPE*)dstData + 4 * xRight;
1328
1329 cols = xRight - xLeft + 1;
1330 dp = vis_alignaddr(dstPixelPtr, 0);
1331 dstLineEnd += 3;
1332 mask = vis_edge16(dstPixelPtr, dstLineEnd);
1333 gsrd = ((8 - (mlib_addr)dstPixelPtr) & 7);
1334
1335 i = 0;
1336
1337 if (i <= cols - 4) {
1338
1339 NEXT_PIXEL_4BC_S16();
1340 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4);
1341
1342 NEXT_PIXEL_4BC_S16();
1343
1344 BC_S16_4CH(mlib_filters_table_4);
1345 FADD_4BC_S16();
1346
1347 BC_S16_4CH(mlib_filters_table_4);
1348
1349 #pragma pipeloop(0)
1350 for (; i < cols-4; i++) {
1351 vis_alignaddr((void *)gsrd, 0);
1352 res = vis_faligndata(res, res);
1353
1354 vis_pst_16(res, dp++, mask);
1355 vis_pst_16(res, dp, ~mask);
1356
1357 FADD_4BC_S16();
1358 BC_S16_4CH(mlib_filters_table_4);
1359 }
1360
1361 vis_alignaddr((void *)gsrd, 0);
1362 res = vis_faligndata(res, res);
1363 vis_pst_16(res, dp++, mask);
1364 vis_pst_16(res, dp, ~mask);
1365
1366 FADD_4BC_S16();
1367 vis_alignaddr((void *)gsrd, 0);
1368 res = vis_faligndata(res, res);
1369 vis_pst_16(res, dp++, mask);
1370 vis_pst_16(res, dp, ~mask);
1371
1372 RESULT_4BC_S16_1PIXEL();
1373 vis_alignaddr((void *)gsrd, 0);
1374 res = vis_faligndata(res, res);
1375 vis_pst_16(res, dp++, mask);
1376 vis_pst_16(res, dp, ~mask);
1377
1378 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4);
1379 RESULT_4BC_S16_1PIXEL();
1380 vis_alignaddr((void *)gsrd, 0);
1381 res = vis_faligndata(res, res);
1382 vis_pst_16(res, dp++, mask);
1383 vis_pst_16(res, dp, ~mask);
1384 i += 4;
1385 }
1386
1387 #pragma pipeloop(0)
1388 for (; i < cols; i++) {
1389 NEXT_PIXEL_4BC_S16();
1390 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4);
1391 RESULT_4BC_S16_1PIXEL();
1392 vis_alignaddr((void *)gsrd, 0);
1393 res = vis_faligndata(res, res);
1394 vis_pst_16(res, dp++, mask);
1395 vis_pst_16(res, dp, ~mask);
1396 }
1397 }
1398
1399 return MLIB_SUCCESS;
1400 }
1401
1402 /***************************************************************/
1403