1 /*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27
28 /*
29 * The functions step along the lines from xLeft to xRight and apply
30 * the bicubic filtering.
31 *
32 */
33
34 #include "vis_proto.h"
35 #include "mlib_ImageAffine.h"
36 #include "mlib_v_ImageFilters.h"
37
38 /***************************************************************/
39 #define DTYPE mlib_s16
40
41 #define FILTER_BITS 9
42
43 /***************************************************************/
44 #define sPtr srcPixelPtr
45
46 /***************************************************************/
47 #define NEXT_PIXEL_1BC_S16() \
48 xSrc = (X >> MLIB_SHIFT)-1; \
49 ySrc = (Y >> MLIB_SHIFT)-1; \
50 sPtr = (mlib_s16 *)lineAddr[ySrc] + xSrc
51
52 /***************************************************************/
53 #define LOAD_BC_S16_1CH_1PIXEL(mlib_filters_s16, mlib_filters_s16_4) \
54 vis_alignaddr(sPtr, 0); \
55 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
56 data0 = dpSrc[0]; \
57 data1 = dpSrc[1]; \
58 row0 = vis_faligndata(data0, data1); \
59 sPtr += srcYStride; \
60 vis_alignaddr(sPtr, 0); \
61 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
62 data0 = dpSrc[0]; \
63 data1 = dpSrc[1]; \
64 row1 = vis_faligndata(data0, data1); \
65 sPtr += srcYStride; \
66 vis_alignaddr(sPtr, 0); \
67 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
68 data0 = dpSrc[0]; \
69 data1 = dpSrc[1]; \
70 row2 = vis_faligndata(data0, data1); \
71 sPtr += srcYStride; \
72 vis_alignaddr(sPtr, 0); \
73 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
74 data0 = dpSrc[0]; \
75 data1 = dpSrc[1]; \
76 row3 = vis_faligndata(data0, data1); \
77 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \
78 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
79 yFilter0 = yPtr[0]; \
80 yFilter1 = yPtr[1]; \
81 yFilter2 = yPtr[2]; \
82 yFilter3 = yPtr[3]; \
83 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \
84 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \
85 X += dX; \
86 Y += dY
87
88 /***************************************************************/
89 #define RESULT_1BC_S16_1PIXEL() \
90 u0 = vis_fmul8sux16(row0, yFilter0); \
91 u1 = vis_fmul8ulx16(row0, yFilter0); \
92 u2 = vis_fmul8sux16(row1, yFilter1); \
93 v0 = vis_fpadd16(u0, u1); \
94 u3 = vis_fmul8ulx16(row1, yFilter1); \
95 u0 = vis_fmul8sux16(row2, yFilter2); \
96 v1 = vis_fpadd16(u2, u3); \
97 u1 = vis_fmul8ulx16(row2, yFilter2); \
98 sum = vis_fpadd16(v0, v1); \
99 u2 = vis_fmul8sux16(row3, yFilter3); \
100 v2 = vis_fpadd16(u0, u1); \
101 u3 = vis_fmul8ulx16(row3, yFilter3); \
102 sum = vis_fpadd16(sum, v2); \
103 v3 = vis_fpadd16(u2, u3); \
104 sum = vis_fpadd16(sum, v3); \
105 d00 = vis_fmul8sux16(sum, xFilter); \
106 d10 = vis_fmul8ulx16(sum, xFilter); \
107 d0 = vis_fpadd16(d00, d10); \
108 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \
109 d0 = vis_fmuld8sux16(f_x01000100, p0); \
110 d1 = vis_write_lo(d1, vis_fpadd32s(vis_read_hi(d0), vis_read_lo(d0))); \
111 res = vis_fpackfix_pair(d1, d1)
112
113 /***************************************************************/
114 #define BC_S16_1CH(ind, mlib_filters_s16, mlib_filters_s16_4) \
115 u0 = vis_fmul8sux16(row0, yFilter0); \
116 u1 = vis_fmul8ulx16(row0, yFilter0); \
117 vis_alignaddr(sPtr, 0); \
118 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
119 u2 = vis_fmul8sux16(row1, yFilter1); \
120 v0 = vis_fpadd16(u0, u1); \
121 data0 = dpSrc[0]; \
122 filterposy = (Y >> FILTER_SHIFT); \
123 u3 = vis_fmul8ulx16(row1, yFilter1); \
124 data1 = dpSrc[1]; \
125 row0 = vis_faligndata(data0, data1); \
126 filterposx = (X >> FILTER_SHIFT); \
127 sPtr += srcYStride; \
128 vis_alignaddr(sPtr, 0); \
129 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
130 u0 = vis_fmul8sux16(row2, yFilter2); \
131 v1 = vis_fpadd16(u2, u3); \
132 data0 = dpSrc[0]; \
133 u1 = vis_fmul8ulx16(row2, yFilter2); \
134 sum = vis_fpadd16(v0, v1); \
135 X += dX; \
136 data1 = dpSrc[1]; \
137 row1 = vis_faligndata(data0, data1); \
138 sPtr += srcYStride; \
139 vis_alignaddr(sPtr, 0); \
140 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
141 u2 = vis_fmul8sux16(row3, yFilter3); \
142 v2 = vis_fpadd16(u0, u1); \
143 Y += dY; \
144 xSrc = (X >> MLIB_SHIFT)-1; \
145 data0 = dpSrc[0]; \
146 u3 = vis_fmul8ulx16(row3, yFilter3); \
147 sum = vis_fpadd16(sum, v2); \
148 ySrc = (Y >> MLIB_SHIFT)-1; \
149 data1 = dpSrc[1]; \
150 filterposy &= FILTER_MASK; \
151 row2 = vis_faligndata(data0, data1); \
152 sPtr += srcYStride; \
153 filterposx &= FILTER_MASK; \
154 vis_alignaddr(sPtr, 0); \
155 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
156 data0 = dpSrc[0]; \
157 v3 = vis_fpadd16(u2, u3); \
158 data1 = dpSrc[1]; \
159 row3 = vis_faligndata(data0, data1); \
160 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
161 yFilter0 = yPtr[0]; \
162 sum = vis_fpadd16(sum, v3); \
163 yFilter1 = yPtr[1]; \
164 d0 = vis_fmul8sux16(sum, xFilter); \
165 yFilter2 = yPtr[2]; \
166 d1 = vis_fmul8ulx16(sum, xFilter); \
167 yFilter3 = yPtr[3]; \
168 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \
169 d0##ind = vis_fpadd16(d0, d1); \
170 sPtr = (mlib_s16 *)lineAddr[ySrc] + xSrc
171
172 /***************************************************************/
173 #define FADD_1BC_S16() \
174 p0 = vis_fpadd16s(vis_read_hi(d00), vis_read_lo(d00)); \
175 p1 = vis_fpadd16s(vis_read_hi(d01), vis_read_lo(d01)); \
176 p2 = vis_fpadd16s(vis_read_hi(d02), vis_read_lo(d02)); \
177 p3 = vis_fpadd16s(vis_read_hi(d03), vis_read_lo(d03)); \
178 d0 = vis_fmuld8sux16(f_x01000100, p0); \
179 d1 = vis_fmuld8sux16(f_x01000100, p1); \
180 d2 = vis_fmuld8sux16(f_x01000100, p2); \
181 d3 = vis_fmuld8sux16(f_x01000100, p3); \
182 d0 = vis_freg_pair(vis_fpadd32s(vis_read_hi(d0), vis_read_lo(d0)), \
183 vis_fpadd32s(vis_read_hi(d1), vis_read_lo(d1))); \
184 d1 = vis_freg_pair(vis_fpadd32s(vis_read_hi(d2), vis_read_lo(d2)), \
185 vis_fpadd32s(vis_read_hi(d3), vis_read_lo(d3))); \
186 res = vis_fpackfix_pair(d0, d1)
187
188 /***************************************************************/
mlib_ImageAffine_s16_1ch_bc(mlib_affine_param * param)189 mlib_status mlib_ImageAffine_s16_1ch_bc (mlib_affine_param *param)
190 {
191 DECLAREVAR_BC();
192 mlib_s32 filterposx, filterposy;
193 mlib_d64 data0, data1;
194 mlib_d64 sum;
195 mlib_d64 row0, row1, row2, row3;
196 mlib_f32 p0, p1, p2, p3;
197 mlib_d64 xFilter, yFilter0, yFilter1, yFilter2, yFilter3;
198 mlib_d64 v0, v1, v2, v3;
199 mlib_d64 u0, u1, u2, u3;
200 mlib_d64 d0, d1, d2, d3;
201 mlib_d64 d00, d10, d01, d02, d03;
202 mlib_d64 *yPtr;
203 mlib_d64 *dpSrc;
204 mlib_s32 align, cols, i;
205 mlib_d64 res;
206 mlib_f32 f_x01000100 = vis_to_float(0x01000100);
207 const mlib_s16 *mlib_filters_table ;
208 const mlib_s16 *mlib_filters_table_4;
209
210 if (filter == MLIB_BICUBIC) {
211 mlib_filters_table = mlib_filters_s16_bc;
212 mlib_filters_table_4 = mlib_filters_s16_bc_4;
213 } else {
214 mlib_filters_table = mlib_filters_s16_bc2;
215 mlib_filters_table_4 = mlib_filters_s16_bc2_4;
216 }
217
218 srcYStride >>= 1;
219
220 for (j = yStart; j <= yFinish; j++) {
221
222 vis_write_gsr(10 << 3);
223
224 CLIP(1);
225
226 cols = xRight - xLeft + 1;
227 align = (8 - ((mlib_addr)dstPixelPtr) & 7) & 7;
228 align >>= 1;
229 align = (cols < align)? cols : align;
230
231 for (i = 0; i < align; i++) {
232 NEXT_PIXEL_1BC_S16();
233 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
234 RESULT_1BC_S16_1PIXEL();
235 vis_st_u16(res, dstPixelPtr++);
236 }
237
238 if (i <= cols - 10) {
239
240 NEXT_PIXEL_1BC_S16();
241 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
242
243 NEXT_PIXEL_1BC_S16();
244
245 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4);
246 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4);
247 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4);
248 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4);
249
250 FADD_1BC_S16();
251
252 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4);
253 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4);
254 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4);
255 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4);
256
257 #pragma pipeloop(0)
258 for (; i <= cols - 14; i += 4) {
259 *(mlib_d64*)dstPixelPtr = res;
260 FADD_1BC_S16();
261 BC_S16_1CH(0, mlib_filters_table, mlib_filters_table_4);
262 BC_S16_1CH(1, mlib_filters_table, mlib_filters_table_4);
263 BC_S16_1CH(2, mlib_filters_table, mlib_filters_table_4);
264 BC_S16_1CH(3, mlib_filters_table, mlib_filters_table_4);
265 dstPixelPtr += 4;
266 }
267
268 *(mlib_d64*)dstPixelPtr = res;
269 dstPixelPtr += 4;
270 FADD_1BC_S16();
271 *(mlib_d64*)dstPixelPtr = res;
272 dstPixelPtr += 4;
273
274 RESULT_1BC_S16_1PIXEL();
275 vis_st_u16(res, dstPixelPtr++);
276
277 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
278 RESULT_1BC_S16_1PIXEL();
279 vis_st_u16(res, dstPixelPtr++);
280 i += 10;
281 }
282
283 for (; i < cols; i++) {
284 NEXT_PIXEL_1BC_S16();
285 LOAD_BC_S16_1CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
286 RESULT_1BC_S16_1PIXEL();
287 vis_st_u16(res, dstPixelPtr++);
288 }
289 }
290
291 return MLIB_SUCCESS;
292 }
293
294 /***************************************************************/
295 #define NEXT_PIXEL_2BC_S16() \
296 xSrc = (X >> MLIB_SHIFT)-1; \
297 ySrc = (Y >> MLIB_SHIFT)-1; \
298 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 1)
299
300 /***************************************************************/
301 #define LOAD_BC_S16_2CH_1PIXEL(mlib_filters_s16, mlib_filters_s16_4) \
302 vis_alignaddr(sPtr, 0); \
303 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
304 data0 = dpSrc[0]; \
305 data1 = dpSrc[1]; \
306 data2 = dpSrc[2]; \
307 row00 = vis_faligndata(data0, data1); \
308 row01 = vis_faligndata(data1, data2); \
309 sPtr += srcYStride; \
310 vis_alignaddr(sPtr, 0); \
311 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
312 data0 = dpSrc[0]; \
313 data1 = dpSrc[1]; \
314 data2 = dpSrc[2]; \
315 row10 = vis_faligndata(data0, data1); \
316 row11 = vis_faligndata(data1, data2); \
317 sPtr += srcYStride; \
318 vis_alignaddr(sPtr, 0); \
319 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
320 data0 = dpSrc[0]; \
321 data1 = dpSrc[1]; \
322 data2 = dpSrc[2]; \
323 row20 = vis_faligndata(data0, data1); \
324 row21 = vis_faligndata(data1, data2); \
325 sPtr += srcYStride; \
326 vis_alignaddr(sPtr, 0); \
327 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
328 data0 = dpSrc[0]; \
329 data1 = dpSrc[1]; \
330 data2 = dpSrc[2]; \
331 row30 = vis_faligndata(data0, data1); \
332 row31 = vis_faligndata(data1, data2); \
333 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \
334 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
335 yFilter0 = yPtr[0]; \
336 yFilter1 = yPtr[1]; \
337 yFilter2 = yPtr[2]; \
338 yFilter3 = yPtr[3]; \
339 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \
340 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \
341 X += dX; \
342 Y += dY
343
344 /***************************************************************/
345 #define RESULT_2BC_S16_1PIXEL() \
346 u00 = vis_fmul8sux16(row00, yFilter0); \
347 dr = vis_fpmerge(vis_read_hi(xFilter), vis_read_lo(xFilter)); \
348 u01 = vis_fmul8ulx16(row00, yFilter0); \
349 dr = vis_fpmerge(vis_read_hi(dr), vis_read_lo(dr)); \
350 u10 = vis_fmul8sux16(row01, yFilter0); \
351 dr1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr)); \
352 u11 = vis_fmul8ulx16(row01, yFilter0); \
353 dr = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr)); \
354 u20 = vis_fmul8sux16(row10, yFilter1); \
355 v00 = vis_fpadd16(u00, u01); \
356 u21 = vis_fmul8ulx16(row10, yFilter1); \
357 v01 = vis_fpadd16(u10, u11); \
358 u00 = vis_fmul8sux16(row11, yFilter1); \
359 xFilter0 = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr1)); \
360 u01 = vis_fmul8ulx16(row11, yFilter1); \
361 u10 = vis_fmul8sux16(row20, yFilter2); \
362 u11 = vis_fmul8ulx16(row20, yFilter2); \
363 v10 = vis_fpadd16(u20, u21); \
364 sum0 = vis_fpadd16(v00, v10); \
365 u20 = vis_fmul8sux16(row21, yFilter2); \
366 v11 = vis_fpadd16(u00, u01); \
367 u21 = vis_fmul8ulx16(row21, yFilter2); \
368 xFilter1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr1)); \
369 u00 = vis_fmul8sux16(row30, yFilter3); \
370 v20 = vis_fpadd16(u10, u11); \
371 sum1 = vis_fpadd16(v01, v11); \
372 u01 = vis_fmul8ulx16(row30, yFilter3); \
373 sum0 = vis_fpadd16(sum0, v20); \
374 v21 = vis_fpadd16(u20, u21); \
375 u10 = vis_fmul8sux16(row31, yFilter3); \
376 v30 = vis_fpadd16(u00, u01); \
377 sum1 = vis_fpadd16(sum1, v21); \
378 u11 = vis_fmul8ulx16(row31, yFilter3); \
379 sum0 = vis_fpadd16(sum0, v30); \
380 v31 = vis_fpadd16(u10, u11); \
381 sum1 = vis_fpadd16(sum1, v31); \
382 d00 = vis_fmul8sux16(sum0, xFilter0); \
383 d10 = vis_fmul8ulx16(sum0, xFilter0); \
384 d20 = vis_fmul8sux16(sum1, xFilter1); \
385 d30 = vis_fmul8ulx16(sum1, xFilter1); \
386 d0 = vis_fpadd16(d00, d10); \
387 d1 = vis_fpadd16(d20, d30); \
388 d0 = vis_fpadd16(d0, d1); \
389 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \
390 d0 = vis_fmuld8sux16(f_x01000100, p0); \
391 res = vis_fpackfix_pair(d0, d0)
392
393 /***************************************************************/
394 #define BC_S16_2CH(ind, mlib_filters_s16, mlib_filters_s16_4) \
395 u00 = vis_fmul8sux16(row00, yFilter0); \
396 dr = vis_fpmerge(vis_read_hi(xFilter), vis_read_lo(xFilter)); \
397 u01 = vis_fmul8ulx16(row00, yFilter0); \
398 dr = vis_fpmerge(vis_read_hi(dr), vis_read_lo(dr)); \
399 u10 = vis_fmul8sux16(row01, yFilter0); \
400 dr1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr)); \
401 u11 = vis_fmul8ulx16(row01, yFilter0); \
402 dr = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr)); \
403 vis_alignaddr(sPtr, 0); \
404 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
405 u20 = vis_fmul8sux16(row10, yFilter1); \
406 v00 = vis_fpadd16(u00, u01); \
407 u21 = vis_fmul8ulx16(row10, yFilter1); \
408 data0 = dpSrc[0]; \
409 filterposy = (Y >> FILTER_SHIFT); \
410 v01 = vis_fpadd16(u10, u11); \
411 data1 = dpSrc[1]; \
412 u00 = vis_fmul8sux16(row11, yFilter1); \
413 xFilter0 = vis_fpmerge(vis_read_hi(dr), vis_read_hi(dr1)); \
414 data2 = dpSrc[2]; \
415 u01 = vis_fmul8ulx16(row11, yFilter1); \
416 row00 = vis_faligndata(data0, data1); \
417 u10 = vis_fmul8sux16(row20, yFilter2); \
418 row01 = vis_faligndata(data1, data2); \
419 filterposx = (X >> FILTER_SHIFT); \
420 sPtr += srcYStride; \
421 vis_alignaddr(sPtr, 0); \
422 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
423 u11 = vis_fmul8ulx16(row20, yFilter2); \
424 v10 = vis_fpadd16(u20, u21); \
425 data0 = dpSrc[0]; \
426 sum0 = vis_fpadd16(v00, v10); \
427 X += dX; \
428 data1 = dpSrc[1]; \
429 u20 = vis_fmul8sux16(row21, yFilter2); \
430 v11 = vis_fpadd16(u00, u01); \
431 data2 = dpSrc[2]; \
432 row10 = vis_faligndata(data0, data1); \
433 u21 = vis_fmul8ulx16(row21, yFilter2); \
434 row11 = vis_faligndata(data1, data2); \
435 sPtr += srcYStride; \
436 xFilter1 = vis_fpmerge(vis_read_lo(dr), vis_read_lo(dr1)); \
437 vis_alignaddr(sPtr, 0); \
438 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
439 u00 = vis_fmul8sux16(row30, yFilter3); \
440 v20 = vis_fpadd16(u10, u11); \
441 Y += dY; \
442 xSrc = (X >> MLIB_SHIFT)-1; \
443 sum1 = vis_fpadd16(v01, v11); \
444 data0 = dpSrc[0]; \
445 u01 = vis_fmul8ulx16(row30, yFilter3); \
446 sum0 = vis_fpadd16(sum0, v20); \
447 ySrc = (Y >> MLIB_SHIFT)-1; \
448 data1 = dpSrc[1]; \
449 v21 = vis_fpadd16(u20, u21); \
450 u10 = vis_fmul8sux16(row31, yFilter3); \
451 data2 = dpSrc[2]; \
452 v30 = vis_fpadd16(u00, u01); \
453 filterposy &= FILTER_MASK; \
454 row20 = vis_faligndata(data0, data1); \
455 sum1 = vis_fpadd16(sum1, v21); \
456 u11 = vis_fmul8ulx16(row31, yFilter3); \
457 row21 = vis_faligndata(data1, data2); \
458 sPtr += srcYStride; \
459 filterposx &= FILTER_MASK; \
460 v31 = vis_fpadd16(u10, u11); \
461 vis_alignaddr(sPtr, 0); \
462 dpSrc = (mlib_d64*)(((mlib_addr)sPtr) & (~7)); \
463 data0 = dpSrc[0]; \
464 sum0 = vis_fpadd16(sum0, v30); \
465 data1 = dpSrc[1]; \
466 sum1 = vis_fpadd16(sum1, v31); \
467 data2 = dpSrc[2]; \
468 row30 = vis_faligndata(data0, data1); \
469 d0 = vis_fmul8sux16(sum0, xFilter0); \
470 row31 = vis_faligndata(data1, data2); \
471 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
472 d1 = vis_fmul8ulx16(sum0, xFilter0); \
473 yFilter0 = yPtr[0]; \
474 d2 = vis_fmul8sux16(sum1, xFilter1); \
475 yFilter1 = yPtr[1]; \
476 d3 = vis_fmul8ulx16(sum1, xFilter1); \
477 d0##ind = vis_fpadd16(d0, d1); \
478 yFilter2 = yPtr[2]; \
479 yFilter3 = yPtr[3]; \
480 d1##ind = vis_fpadd16(d2, d3); \
481 xFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_s16 + filterposx)); \
482 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 1)
483
484 /***************************************************************/
485 #define FADD_2BC_S16() \
486 d0 = vis_fpadd16(d00, d10); \
487 d2 = vis_fpadd16(d01, d11); \
488 p0 = vis_fpadd16s(vis_read_hi(d0), vis_read_lo(d0)); \
489 p1 = vis_fpadd16s(vis_read_hi(d2), vis_read_lo(d2)); \
490 d0 = vis_fmuld8sux16(f_x01000100, p0); \
491 d1 = vis_fmuld8sux16(f_x01000100, p1); \
492 res = vis_fpackfix_pair(d0, d1)
493
494 /***************************************************************/
mlib_ImageAffine_s16_2ch_bc(mlib_affine_param * param)495 mlib_status mlib_ImageAffine_s16_2ch_bc (mlib_affine_param *param)
496 {
497 DECLAREVAR_BC();
498 DTYPE *dstLineEnd;
499 mlib_s32 filterposx, filterposy;
500 mlib_d64 data0, data1, data2;
501 mlib_d64 sum0, sum1;
502 mlib_d64 row00, row10, row20, row30;
503 mlib_d64 row01, row11, row21, row31;
504 mlib_f32 p0, p1;
505 mlib_d64 xFilter, xFilter0, xFilter1;
506 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3;
507 mlib_d64 v00, v01, v10, v11, v20, v21, v30, v31;
508 mlib_d64 u00, u01, u10, u11, u20, u21;
509 mlib_d64 d0, d1, d2, d3;
510 mlib_d64 d00, d10, d20, d30, d01, d11;
511 mlib_d64 *yPtr;
512 mlib_d64 *dp, *dpSrc;
513 mlib_s32 cols, i, mask, emask;
514 mlib_d64 res, res1;
515 mlib_d64 dr, dr1;
516 mlib_f32 f_x01000100 = vis_to_float(0x01000100);
517 const mlib_s16 *mlib_filters_table ;
518 const mlib_s16 *mlib_filters_table_4;
519
520 if (filter == MLIB_BICUBIC) {
521 mlib_filters_table = mlib_filters_s16_bc;
522 mlib_filters_table_4 = mlib_filters_s16_bc_4;
523 } else {
524 mlib_filters_table = mlib_filters_s16_bc2;
525 mlib_filters_table_4 = mlib_filters_s16_bc2_4;
526 }
527
528 srcYStride >>= 1;
529
530 for (j = yStart; j <= yFinish; j++) {
531
532 vis_write_gsr(10 << 3);
533
534 CLIP(2);
535 dstLineEnd = (DTYPE*)dstData + 2 * xRight;
536
537 cols = xRight - xLeft + 1;
538 dp = vis_alignaddr(dstPixelPtr, 0);
539 dstLineEnd += 1;
540 mask = vis_edge16(dstPixelPtr, dstLineEnd);
541 i = 0;
542
543 if (i <= cols - 6) {
544
545 NEXT_PIXEL_2BC_S16();
546 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
547
548 NEXT_PIXEL_2BC_S16();
549
550 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4);
551 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4);
552
553 FADD_2BC_S16();
554
555 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4);
556 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4);
557
558 #pragma pipeloop(0)
559 for (; i <= cols-8; i += 2) {
560 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
561 res = vis_faligndata(res, res);
562 vis_pst_16(res, dp++, mask);
563 vis_pst_16(res, dp, ~mask);
564 FADD_2BC_S16();
565 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4);
566 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4);
567 }
568
569 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
570 res = vis_faligndata(res, res);
571 vis_pst_16(res, dp++, mask);
572 vis_pst_16(res, dp, ~mask);
573
574 FADD_2BC_S16();
575 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
576 res = vis_faligndata(res, res);
577 vis_pst_16(res, dp++, mask);
578 vis_pst_16(res, dp, ~mask);
579
580 RESULT_2BC_S16_1PIXEL();
581 res1 = res;
582
583 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
584 RESULT_2BC_S16_1PIXEL();
585 res = vis_write_hi(res, vis_read_hi(res1));
586 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
587 res = vis_faligndata(res, res);
588 vis_pst_16(res, dp++, mask);
589 vis_pst_16(res, dp, ~mask);
590
591 i += 6;
592 }
593
594 if (i <= cols - 4) {
595 NEXT_PIXEL_2BC_S16();
596 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
597
598 NEXT_PIXEL_2BC_S16();
599
600 BC_S16_2CH(0, mlib_filters_table, mlib_filters_table_4);
601 BC_S16_2CH(1, mlib_filters_table, mlib_filters_table_4);
602
603 FADD_2BC_S16();
604 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
605 res = vis_faligndata(res, res);
606 vis_pst_16(res, dp++, mask);
607 vis_pst_16(res, dp, ~mask);
608
609 RESULT_2BC_S16_1PIXEL();
610 res1 = res;
611
612 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
613 RESULT_2BC_S16_1PIXEL();
614 res = vis_write_hi(res, vis_read_hi(res1));
615 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
616 res = vis_faligndata(res, res);
617 vis_pst_16(res, dp++, mask);
618 vis_pst_16(res, dp, ~mask);
619
620 i += 4;
621 }
622
623 if (i <= cols - 2) {
624 NEXT_PIXEL_2BC_S16();
625 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
626 RESULT_2BC_S16_1PIXEL();
627 res1 = res;
628
629 NEXT_PIXEL_2BC_S16();
630 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
631 RESULT_2BC_S16_1PIXEL();
632 res = vis_write_hi(res, vis_read_hi(res1));
633 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
634 res = vis_faligndata(res, res);
635 vis_pst_16(res, dp++, mask);
636 vis_pst_16(res, dp, ~mask);
637
638 i += 2;
639 }
640
641 if (i < cols) {
642 NEXT_PIXEL_2BC_S16();
643 LOAD_BC_S16_2CH_1PIXEL(mlib_filters_table, mlib_filters_table_4);
644 RESULT_2BC_S16_1PIXEL();
645 vis_alignaddr((void *)(8 - (mlib_addr)dstPixelPtr), 0);
646 res = vis_faligndata(res, res);
647 emask = vis_edge16(dp, dstLineEnd);
648 vis_pst_16(res, dp++, mask & emask);
649
650 if ((mlib_s16*)dp <= dstLineEnd) {
651 mask = vis_edge16(dp, dstLineEnd);
652 vis_pst_16(res, dp, mask);
653 }
654 }
655 }
656
657 return MLIB_SUCCESS;
658 }
659
660 /***************************************************************/
661 #define NEXT_PIXEL_3BC_S16() \
662 xSrc = (X >> MLIB_SHIFT)-1; \
663 ySrc = (Y >> MLIB_SHIFT)-1; \
664 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc*3)
665
666 /***************************************************************/
667 #define LOAD_BC_S16_3CH_1PIXEL(mlib_filters_s16_3, mlib_filters_s16_4) \
668 dpSrc = vis_alignaddr(sPtr, 0); \
669 data0 = dpSrc[0]; \
670 data1 = dpSrc[1]; \
671 data2 = dpSrc[2]; \
672 data3 = dpSrc[3]; \
673 row00 = vis_faligndata(data0, data1); \
674 row01 = vis_faligndata(data1, data2); \
675 row02 = vis_faligndata(data2, data3); \
676 sPtr += srcYStride; \
677 dpSrc = vis_alignaddr(sPtr, 0); \
678 data0 = dpSrc[0]; \
679 data1 = dpSrc[1]; \
680 data2 = dpSrc[2]; \
681 data3 = dpSrc[3]; \
682 row10 = vis_faligndata(data0, data1); \
683 row11 = vis_faligndata(data1, data2); \
684 row12 = vis_faligndata(data2, data3); \
685 sPtr += srcYStride; \
686 dpSrc = vis_alignaddr(sPtr, 0); \
687 data0 = dpSrc[0]; \
688 data1 = dpSrc[1]; \
689 data2 = dpSrc[2]; \
690 data3 = dpSrc[3]; \
691 row20 = vis_faligndata(data0, data1); \
692 row21 = vis_faligndata(data1, data2); \
693 row22 = vis_faligndata(data2, data3); \
694 sPtr += srcYStride; \
695 dpSrc = vis_alignaddr(sPtr, 0); \
696 data0 = dpSrc[0]; \
697 data1 = dpSrc[1]; \
698 data2 = dpSrc[2]; \
699 data3 = dpSrc[3]; \
700 row30 = vis_faligndata(data0, data1); \
701 row31 = vis_faligndata(data1, data2); \
702 row32 = vis_faligndata(data2, data3); \
703 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \
704 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
705 yFilter0 = yPtr[0]; \
706 yFilter1 = yPtr[1]; \
707 yFilter2 = yPtr[2]; \
708 yFilter3 = yPtr[3]; \
709 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \
710 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_3 + filterposx*3)); \
711 xFilter0 = xPtr[0]; \
712 xFilter1 = xPtr[1]; \
713 xFilter2 = xPtr[2]; \
714 X += dX; \
715 Y += dY
716
717 /***************************************************************/
718 #define STORE_BC_S16_3CH_1PIXEL() \
719 dstPixelPtr[0] = f0.t[0]; \
720 dstPixelPtr[1] = f0.t[1]; \
721 dstPixelPtr[2] = f0.t[2]; \
722 dstPixelPtr += 3
723
724 /***************************************************************/
725 #define RESULT_3BC_S16_1PIXEL() \
726 u00 = vis_fmul8sux16(row00, yFilter0); \
727 u01 = vis_fmul8ulx16(row00, yFilter0); \
728 u10 = vis_fmul8sux16(row01, yFilter0); \
729 u11 = vis_fmul8ulx16(row01, yFilter0); \
730 v00 = vis_fpadd16(u00, u01); \
731 u20 = vis_fmul8sux16(row02, yFilter0); \
732 v01 = vis_fpadd16(u10, u11); \
733 u21 = vis_fmul8ulx16(row02, yFilter0); \
734 u00 = vis_fmul8sux16(row10, yFilter1); \
735 u01 = vis_fmul8ulx16(row10, yFilter1); \
736 v02 = vis_fpadd16(u20, u21); \
737 u10 = vis_fmul8sux16(row11, yFilter1); \
738 u11 = vis_fmul8ulx16(row11, yFilter1); \
739 v10 = vis_fpadd16(u00, u01); \
740 u20 = vis_fmul8sux16(row12, yFilter1); \
741 u21 = vis_fmul8ulx16(row12, yFilter1); \
742 u00 = vis_fmul8sux16(row20, yFilter2); \
743 v11 = vis_fpadd16(u10, u11); \
744 u01 = vis_fmul8ulx16(row20, yFilter2); \
745 v12 = vis_fpadd16(u20, u21); \
746 u10 = vis_fmul8sux16(row21, yFilter2); \
747 u11 = vis_fmul8ulx16(row21, yFilter2); \
748 v20 = vis_fpadd16(u00, u01); \
749 u20 = vis_fmul8sux16(row22, yFilter2); \
750 sum0 = vis_fpadd16(v00, v10); \
751 u21 = vis_fmul8ulx16(row22, yFilter2); \
752 u00 = vis_fmul8sux16(row30, yFilter3); \
753 u01 = vis_fmul8ulx16(row30, yFilter3); \
754 v21 = vis_fpadd16(u10, u11); \
755 sum1 = vis_fpadd16(v01, v11); \
756 u10 = vis_fmul8sux16(row31, yFilter3); \
757 sum2 = vis_fpadd16(v02, v12); \
758 v22 = vis_fpadd16(u20, u21); \
759 u11 = vis_fmul8ulx16(row31, yFilter3); \
760 sum0 = vis_fpadd16(sum0, v20); \
761 u20 = vis_fmul8sux16(row32, yFilter3); \
762 v30 = vis_fpadd16(u00, u01); \
763 sum1 = vis_fpadd16(sum1, v21); \
764 u21 = vis_fmul8ulx16(row32, yFilter3); \
765 v31 = vis_fpadd16(u10, u11); \
766 sum2 = vis_fpadd16(sum2, v22); \
767 v32 = vis_fpadd16(u20, u21); \
768 sum0 = vis_fpadd16(sum0, v30); \
769 row30 = vis_faligndata(data0, data1); \
770 v00 = vis_fmul8sux16(sum0, xFilter0); \
771 sum1 = vis_fpadd16(sum1, v31); \
772 sum2 = vis_fpadd16(sum2, v32); \
773 v01 = vis_fmul8ulx16(sum0, xFilter0); \
774 v10 = vis_fmul8sux16(sum1, xFilter1); \
775 v11 = vis_fmul8ulx16(sum1, xFilter1); \
776 d0 = vis_fpadd16(v00, v01); \
777 v20 = vis_fmul8sux16(sum2, xFilter2); \
778 v21 = vis_fmul8ulx16(sum2, xFilter2); \
779 d1 = vis_fpadd16(v10, v11); \
780 d2 = vis_fpadd16(v20, v21); \
781 vis_alignaddr((void*)6, 0); \
782 d3 = vis_faligndata(d0, d1); \
783 vis_alignaddr((void*)2, 0); \
784 d4 = vis_faligndata(d1, d2); \
785 d0 = vis_fpadd16(d0, d3); \
786 d2 = vis_fpadd16(d2, d4); \
787 d1 = vis_faligndata(d2, d2); \
788 d0 = vis_fpadd16(d0, d1); \
789 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \
790 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \
791 f0.d = vis_fpackfix_pair(d2, d3)
792
793 /***************************************************************/
794 #define BC_S16_3CH(mlib_filters_s16_3, mlib_filters_s16_4) \
795 u00 = vis_fmul8sux16(row00, yFilter0); \
796 u01 = vis_fmul8ulx16(row00, yFilter0); \
797 u10 = vis_fmul8sux16(row01, yFilter0); \
798 u11 = vis_fmul8ulx16(row01, yFilter0); \
799 v00 = vis_fpadd16(u00, u01); \
800 u20 = vis_fmul8sux16(row02, yFilter0); \
801 v01 = vis_fpadd16(u10, u11); \
802 u21 = vis_fmul8ulx16(row02, yFilter0); \
803 dpSrc = vis_alignaddr(sPtr, 0); \
804 u00 = vis_fmul8sux16(row10, yFilter1); \
805 u01 = vis_fmul8ulx16(row10, yFilter1); \
806 data0 = dpSrc[0]; \
807 filterposy = (Y >> FILTER_SHIFT); \
808 v02 = vis_fpadd16(u20, u21); \
809 data1 = dpSrc[1]; \
810 u10 = vis_fmul8sux16(row11, yFilter1); \
811 data2 = dpSrc[2]; \
812 u11 = vis_fmul8ulx16(row11, yFilter1); \
813 v10 = vis_fpadd16(u00, u01); \
814 data3 = dpSrc[3]; \
815 u20 = vis_fmul8sux16(row12, yFilter1); \
816 row00 = vis_faligndata(data0, data1); \
817 u21 = vis_fmul8ulx16(row12, yFilter1); \
818 row01 = vis_faligndata(data1, data2); \
819 u00 = vis_fmul8sux16(row20, yFilter2); \
820 row02 = vis_faligndata(data2, data3); \
821 filterposx = (X >> FILTER_SHIFT); \
822 sPtr += srcYStride; \
823 dpSrc = vis_alignaddr(sPtr, 0); \
824 v11 = vis_fpadd16(u10, u11); \
825 u01 = vis_fmul8ulx16(row20, yFilter2); \
826 v12 = vis_fpadd16(u20, u21); \
827 data0 = dpSrc[0]; \
828 u10 = vis_fmul8sux16(row21, yFilter2); \
829 X += dX; \
830 data1 = dpSrc[1]; \
831 u11 = vis_fmul8ulx16(row21, yFilter2); \
832 v20 = vis_fpadd16(u00, u01); \
833 data2 = dpSrc[2]; \
834 u20 = vis_fmul8sux16(row22, yFilter2); \
835 sum0 = vis_fpadd16(v00, v10); \
836 data3 = dpSrc[3]; \
837 row10 = vis_faligndata(data0, data1); \
838 u21 = vis_fmul8ulx16(row22, yFilter2); \
839 row11 = vis_faligndata(data1, data2); \
840 u00 = vis_fmul8sux16(row30, yFilter3); \
841 row12 = vis_faligndata(data2, data3); \
842 sPtr += srcYStride; \
843 dpSrc = vis_alignaddr(sPtr, 0); \
844 u01 = vis_fmul8ulx16(row30, yFilter3); \
845 v21 = vis_fpadd16(u10, u11); \
846 Y += dY; \
847 xSrc = (X >> MLIB_SHIFT)-1; \
848 sum1 = vis_fpadd16(v01, v11); \
849 data0 = dpSrc[0]; \
850 u10 = vis_fmul8sux16(row31, yFilter3); \
851 sum2 = vis_fpadd16(v02, v12); \
852 ySrc = (Y >> MLIB_SHIFT)-1; \
853 data1 = dpSrc[1]; \
854 v22 = vis_fpadd16(u20, u21); \
855 u11 = vis_fmul8ulx16(row31, yFilter3); \
856 data2 = dpSrc[2]; \
857 sum0 = vis_fpadd16(sum0, v20); \
858 u20 = vis_fmul8sux16(row32, yFilter3); \
859 data3 = dpSrc[3]; \
860 v30 = vis_fpadd16(u00, u01); \
861 filterposy &= FILTER_MASK; \
862 row20 = vis_faligndata(data0, data1); \
863 sum1 = vis_fpadd16(sum1, v21); \
864 u21 = vis_fmul8ulx16(row32, yFilter3); \
865 row21 = vis_faligndata(data1, data2); \
866 row22 = vis_faligndata(data2, data3); \
867 sPtr += srcYStride; \
868 filterposx &= FILTER_MASK; \
869 v31 = vis_fpadd16(u10, u11); \
870 dpSrc = vis_alignaddr(sPtr, 0); \
871 data0 = dpSrc[0]; \
872 sum2 = vis_fpadd16(sum2, v22); \
873 data1 = dpSrc[1]; \
874 v32 = vis_fpadd16(u20, u21); \
875 data2 = dpSrc[2]; \
876 sum0 = vis_fpadd16(sum0, v30); \
877 data3 = dpSrc[3]; \
878 row30 = vis_faligndata(data0, data1); \
879 v00 = vis_fmul8sux16(sum0, xFilter0); \
880 row31 = vis_faligndata(data1, data2); \
881 row32 = vis_faligndata(data2, data3); \
882 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
883 sum1 = vis_fpadd16(sum1, v31); \
884 yFilter0 = yPtr[0]; \
885 sum2 = vis_fpadd16(sum2, v32); \
886 v01 = vis_fmul8ulx16(sum0, xFilter0); \
887 yFilter1 = yPtr[1]; \
888 v10 = vis_fmul8sux16(sum1, xFilter1); \
889 yFilter2 = yPtr[2]; \
890 v11 = vis_fmul8ulx16(sum1, xFilter1); \
891 d0 = vis_fpadd16(v00, v01); \
892 yFilter3 = yPtr[3]; \
893 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_3 + filterposx*3)); \
894 v20 = vis_fmul8sux16(sum2, xFilter2); \
895 xFilter0 = xPtr[0]; \
896 v21 = vis_fmul8ulx16(sum2, xFilter2); \
897 d1 = vis_fpadd16(v10, v11); \
898 xFilter1 = xPtr[1]; \
899 d2 = vis_fpadd16(v20, v21); \
900 xFilter2 = xPtr[2]; \
901 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc*3)
902
903 /***************************************************************/
904 #define FADD_3BC_S16() \
905 vis_alignaddr((void*)6, 0); \
906 d3 = vis_faligndata(d0, d1); \
907 vis_alignaddr((void*)2, 0); \
908 d4 = vis_faligndata(d1, d2); \
909 d0 = vis_fpadd16(d0, d3); \
910 d2 = vis_fpadd16(d2, d4); \
911 d1 = vis_faligndata(d2, d2); \
912 d0 = vis_fpadd16(d0, d1); \
913 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \
914 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \
915 f0.d = vis_fpackfix_pair(d2, d3)
916
917 /***************************************************************/
mlib_ImageAffine_s16_3ch_bc(mlib_affine_param * param)918 mlib_status mlib_ImageAffine_s16_3ch_bc (mlib_affine_param *param)
919 {
920 DECLAREVAR_BC();
921 mlib_s32 filterposx, filterposy;
922 mlib_d64 data0, data1, data2, data3;
923 mlib_d64 sum0, sum1, sum2;
924 mlib_d64 row00, row10, row20, row30;
925 mlib_d64 row01, row11, row21, row31;
926 mlib_d64 row02, row12, row22, row32;
927 mlib_d64 xFilter0, xFilter1, xFilter2;
928 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3;
929 mlib_d64 v00, v01, v02, v10, v11, v12, v20, v21, v22, v30, v31, v32;
930 mlib_d64 u00, u01, u10, u11, u20, u21;
931 mlib_d64 d0, d1, d2, d3, d4;
932 mlib_d64 *yPtr, *xPtr;
933 mlib_d64 *dpSrc;
934 mlib_s32 cols, i;
935 mlib_f32 f_x01000100 = vis_to_float(0x01000100);
936 union {
937 mlib_s16 t[4];
938 mlib_d64 d;
939 } f0;
940 const mlib_s16 *mlib_filters_table_3;
941 const mlib_s16 *mlib_filters_table_4;
942
943 if (filter == MLIB_BICUBIC) {
944 mlib_filters_table_3 = mlib_filters_s16_bc_3;
945 mlib_filters_table_4 = mlib_filters_s16_bc_4;
946 } else {
947 mlib_filters_table_3 = mlib_filters_s16_bc2_3;
948 mlib_filters_table_4 = mlib_filters_s16_bc2_4;
949 }
950
951 srcYStride >>= 1;
952
953 for (j = yStart; j <= yFinish; j++) {
954
955 vis_write_gsr(10 << 3);
956
957 CLIP(3);
958
959 cols = xRight - xLeft + 1;
960
961 i = 0;
962
963 if (i <= cols - 4) {
964
965 NEXT_PIXEL_3BC_S16();
966 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4);
967
968 NEXT_PIXEL_3BC_S16();
969
970 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4);
971 FADD_3BC_S16();
972
973 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4);
974
975 #pragma pipeloop(0)
976 for (; i < cols-4; i++) {
977 STORE_BC_S16_3CH_1PIXEL();
978
979 FADD_3BC_S16();
980 BC_S16_3CH(mlib_filters_table_3, mlib_filters_table_4);
981 }
982
983 STORE_BC_S16_3CH_1PIXEL();
984
985 FADD_3BC_S16();
986 STORE_BC_S16_3CH_1PIXEL();
987
988 RESULT_3BC_S16_1PIXEL();
989 STORE_BC_S16_3CH_1PIXEL();
990
991 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4);
992 RESULT_3BC_S16_1PIXEL();
993 STORE_BC_S16_3CH_1PIXEL();
994 i += 4;
995 }
996
997 for (; i < cols; i++) {
998 NEXT_PIXEL_3BC_S16();
999 LOAD_BC_S16_3CH_1PIXEL(mlib_filters_table_3, mlib_filters_table_4);
1000 RESULT_3BC_S16_1PIXEL();
1001 STORE_BC_S16_3CH_1PIXEL();
1002 }
1003 }
1004
1005 return MLIB_SUCCESS;
1006 }
1007
1008 /***************************************************************/
1009 #define NEXT_PIXEL_4BC_S16() \
1010 xSrc = (X >> MLIB_SHIFT)-1; \
1011 ySrc = (Y >> MLIB_SHIFT)-1; \
1012 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 2)
1013
1014 /***************************************************************/
1015 #define LOAD_BC_S16_4CH_1PIXEL(mlib_filters_s16_4) \
1016 dpSrc = vis_alignaddr(sPtr, 0); \
1017 data0 = dpSrc[0]; \
1018 data1 = dpSrc[1]; \
1019 data2 = dpSrc[2]; \
1020 data3 = dpSrc[3]; \
1021 data4 = dpSrc[4]; \
1022 row00 = vis_faligndata(data0, data1); \
1023 row01 = vis_faligndata(data1, data2); \
1024 row02 = vis_faligndata(data2, data3); \
1025 row03 = vis_faligndata(data3, data4); \
1026 sPtr += srcYStride; \
1027 dpSrc = vis_alignaddr(sPtr, 0); \
1028 data0 = dpSrc[0]; \
1029 data1 = dpSrc[1]; \
1030 data2 = dpSrc[2]; \
1031 data3 = dpSrc[3]; \
1032 data4 = dpSrc[4]; \
1033 row10 = vis_faligndata(data0, data1); \
1034 row11 = vis_faligndata(data1, data2); \
1035 row12 = vis_faligndata(data2, data3); \
1036 row13 = vis_faligndata(data3, data4); \
1037 sPtr += srcYStride; \
1038 dpSrc = vis_alignaddr(sPtr, 0); \
1039 data0 = dpSrc[0]; \
1040 data1 = dpSrc[1]; \
1041 data2 = dpSrc[2]; \
1042 data3 = dpSrc[3]; \
1043 data4 = dpSrc[4]; \
1044 row20 = vis_faligndata(data0, data1); \
1045 row21 = vis_faligndata(data1, data2); \
1046 row22 = vis_faligndata(data2, data3); \
1047 row23 = vis_faligndata(data3, data4); \
1048 sPtr += srcYStride; \
1049 dpSrc = vis_alignaddr(sPtr, 0); \
1050 data0 = dpSrc[0]; \
1051 data1 = dpSrc[1]; \
1052 data2 = dpSrc[2]; \
1053 data3 = dpSrc[3]; \
1054 data4 = dpSrc[4]; \
1055 row30 = vis_faligndata(data0, data1); \
1056 row31 = vis_faligndata(data1, data2); \
1057 row32 = vis_faligndata(data2, data3); \
1058 row33 = vis_faligndata(data3, data4); \
1059 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \
1060 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
1061 yFilter0 = yPtr[0]; \
1062 yFilter1 = yPtr[1]; \
1063 yFilter2 = yPtr[2]; \
1064 yFilter3 = yPtr[3]; \
1065 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \
1066 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \
1067 xFilter0 = xPtr[0]; \
1068 xFilter1 = xPtr[1]; \
1069 xFilter2 = xPtr[2]; \
1070 xFilter3 = xPtr[3]; \
1071 X += dX; \
1072 Y += dY
1073
1074 /***************************************************************/
1075 #define RESULT_4BC_S16_1PIXEL() \
1076 u00 = vis_fmul8sux16(row00, yFilter0); \
1077 u01 = vis_fmul8ulx16(row00, yFilter0); \
1078 u10 = vis_fmul8sux16(row01, yFilter0); \
1079 u11 = vis_fmul8ulx16(row01, yFilter0); \
1080 v00 = vis_fpadd16(u00, u01); \
1081 u20 = vis_fmul8sux16(row02, yFilter0); \
1082 v01 = vis_fpadd16(u10, u11); \
1083 u21 = vis_fmul8ulx16(row02, yFilter0); \
1084 u30 = vis_fmul8sux16(row03, yFilter0); \
1085 u31 = vis_fmul8ulx16(row03, yFilter0); \
1086 v02 = vis_fpadd16(u20, u21); \
1087 u00 = vis_fmul8sux16(row10, yFilter1); \
1088 u01 = vis_fmul8ulx16(row10, yFilter1); \
1089 v03 = vis_fpadd16(u30, u31); \
1090 u10 = vis_fmul8sux16(row11, yFilter1); \
1091 u11 = vis_fmul8ulx16(row11, yFilter1); \
1092 v10 = vis_fpadd16(u00, u01); \
1093 u20 = vis_fmul8sux16(row12, yFilter1); \
1094 v11 = vis_fpadd16(u10, u11); \
1095 u21 = vis_fmul8ulx16(row12, yFilter1); \
1096 u30 = vis_fmul8sux16(row13, yFilter1); \
1097 u31 = vis_fmul8ulx16(row13, yFilter1); \
1098 u00 = vis_fmul8sux16(row20, yFilter2); \
1099 v12 = vis_fpadd16(u20, u21); \
1100 u01 = vis_fmul8ulx16(row20, yFilter2); \
1101 v13 = vis_fpadd16(u30, u31); \
1102 u10 = vis_fmul8sux16(row21, yFilter2); \
1103 u11 = vis_fmul8ulx16(row21, yFilter2); \
1104 v20 = vis_fpadd16(u00, u01); \
1105 u20 = vis_fmul8sux16(row22, yFilter2); \
1106 sum0 = vis_fpadd16(v00, v10); \
1107 u21 = vis_fmul8ulx16(row22, yFilter2); \
1108 u30 = vis_fmul8sux16(row23, yFilter2); \
1109 u31 = vis_fmul8ulx16(row23, yFilter2); \
1110 u00 = vis_fmul8sux16(row30, yFilter3); \
1111 u01 = vis_fmul8ulx16(row30, yFilter3); \
1112 v21 = vis_fpadd16(u10, u11); \
1113 sum1 = vis_fpadd16(v01, v11); \
1114 u10 = vis_fmul8sux16(row31, yFilter3); \
1115 sum2 = vis_fpadd16(v02, v12); \
1116 sum3 = vis_fpadd16(v03, v13); \
1117 v22 = vis_fpadd16(u20, u21); \
1118 u11 = vis_fmul8ulx16(row31, yFilter3); \
1119 sum0 = vis_fpadd16(sum0, v20); \
1120 u20 = vis_fmul8sux16(row32, yFilter3); \
1121 u21 = vis_fmul8ulx16(row32, yFilter3); \
1122 v23 = vis_fpadd16(u30, u31); \
1123 v30 = vis_fpadd16(u00, u01); \
1124 sum1 = vis_fpadd16(sum1, v21); \
1125 u30 = vis_fmul8sux16(row33, yFilter3); \
1126 u31 = vis_fmul8ulx16(row33, yFilter3); \
1127 v31 = vis_fpadd16(u10, u11); \
1128 sum2 = vis_fpadd16(sum2, v22); \
1129 sum3 = vis_fpadd16(sum3, v23); \
1130 v32 = vis_fpadd16(u20, u21); \
1131 sum0 = vis_fpadd16(sum0, v30); \
1132 v33 = vis_fpadd16(u30, u31); \
1133 v00 = vis_fmul8sux16(sum0, xFilter0); \
1134 sum1 = vis_fpadd16(sum1, v31); \
1135 sum2 = vis_fpadd16(sum2, v32); \
1136 v01 = vis_fmul8ulx16(sum0, xFilter0); \
1137 v10 = vis_fmul8sux16(sum1, xFilter1); \
1138 sum3 = vis_fpadd16(sum3, v33); \
1139 v11 = vis_fmul8ulx16(sum1, xFilter1); \
1140 d0 = vis_fpadd16(v00, v01); \
1141 v20 = vis_fmul8sux16(sum2, xFilter2); \
1142 v21 = vis_fmul8ulx16(sum2, xFilter2); \
1143 d1 = vis_fpadd16(v10, v11); \
1144 v30 = vis_fmul8sux16(sum3, xFilter3); \
1145 v31 = vis_fmul8ulx16(sum3, xFilter3); \
1146 d2 = vis_fpadd16(v20, v21); \
1147 d3 = vis_fpadd16(v30, v31); \
1148 d0 = vis_fpadd16(d0, d1); \
1149 d2 = vis_fpadd16(d2, d3); \
1150 d0 = vis_fpadd16(d0, d2); \
1151 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \
1152 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \
1153 res = vis_fpackfix_pair(d2, d3)
1154
1155 /***************************************************************/
1156 #define BC_S16_4CH(mlib_filters_s16_4) \
1157 u00 = vis_fmul8sux16(row00, yFilter0); \
1158 u01 = vis_fmul8ulx16(row00, yFilter0); \
1159 u10 = vis_fmul8sux16(row01, yFilter0); \
1160 u11 = vis_fmul8ulx16(row01, yFilter0); \
1161 v00 = vis_fpadd16(u00, u01); \
1162 u20 = vis_fmul8sux16(row02, yFilter0); \
1163 v01 = vis_fpadd16(u10, u11); \
1164 u21 = vis_fmul8ulx16(row02, yFilter0); \
1165 u30 = vis_fmul8sux16(row03, yFilter0); \
1166 u31 = vis_fmul8ulx16(row03, yFilter0); \
1167 v02 = vis_fpadd16(u20, u21); \
1168 dpSrc = vis_alignaddr(sPtr, 0); \
1169 u00 = vis_fmul8sux16(row10, yFilter1); \
1170 u01 = vis_fmul8ulx16(row10, yFilter1); \
1171 data0 = dpSrc[0]; \
1172 filterposy = (Y >> FILTER_SHIFT); \
1173 v03 = vis_fpadd16(u30, u31); \
1174 data1 = dpSrc[1]; \
1175 u10 = vis_fmul8sux16(row11, yFilter1); \
1176 data2 = dpSrc[2]; \
1177 u11 = vis_fmul8ulx16(row11, yFilter1); \
1178 v10 = vis_fpadd16(u00, u01); \
1179 data3 = dpSrc[3]; \
1180 u20 = vis_fmul8sux16(row12, yFilter1); \
1181 v11 = vis_fpadd16(u10, u11); \
1182 data4 = dpSrc[4]; \
1183 u21 = vis_fmul8ulx16(row12, yFilter1); \
1184 row00 = vis_faligndata(data0, data1); \
1185 u30 = vis_fmul8sux16(row13, yFilter1); \
1186 row01 = vis_faligndata(data1, data2); \
1187 u31 = vis_fmul8ulx16(row13, yFilter1); \
1188 row02 = vis_faligndata(data2, data3); \
1189 u00 = vis_fmul8sux16(row20, yFilter2); \
1190 row03 = vis_faligndata(data3, data4); \
1191 filterposx = (X >> FILTER_SHIFT); \
1192 sPtr += srcYStride; \
1193 v12 = vis_fpadd16(u20, u21); \
1194 dpSrc = vis_alignaddr(sPtr, 0); \
1195 u01 = vis_fmul8ulx16(row20, yFilter2); \
1196 v13 = vis_fpadd16(u30, u31); \
1197 data0 = dpSrc[0]; \
1198 u10 = vis_fmul8sux16(row21, yFilter2); \
1199 X += dX; \
1200 data1 = dpSrc[1]; \
1201 u11 = vis_fmul8ulx16(row21, yFilter2); \
1202 v20 = vis_fpadd16(u00, u01); \
1203 data2 = dpSrc[2]; \
1204 u20 = vis_fmul8sux16(row22, yFilter2); \
1205 sum0 = vis_fpadd16(v00, v10); \
1206 data3 = dpSrc[3]; \
1207 u21 = vis_fmul8ulx16(row22, yFilter2); \
1208 data4 = dpSrc[4]; \
1209 row10 = vis_faligndata(data0, data1); \
1210 u30 = vis_fmul8sux16(row23, yFilter2); \
1211 row11 = vis_faligndata(data1, data2); \
1212 u31 = vis_fmul8ulx16(row23, yFilter2); \
1213 row12 = vis_faligndata(data2, data3); \
1214 u00 = vis_fmul8sux16(row30, yFilter3); \
1215 row13 = vis_faligndata(data3, data4); \
1216 sPtr += srcYStride; \
1217 dpSrc = vis_alignaddr(sPtr, 0); \
1218 u01 = vis_fmul8ulx16(row30, yFilter3); \
1219 v21 = vis_fpadd16(u10, u11); \
1220 Y += dY; \
1221 xSrc = (X >> MLIB_SHIFT)-1; \
1222 sum1 = vis_fpadd16(v01, v11); \
1223 data0 = dpSrc[0]; \
1224 u10 = vis_fmul8sux16(row31, yFilter3); \
1225 sum2 = vis_fpadd16(v02, v12); \
1226 sum3 = vis_fpadd16(v03, v13); \
1227 ySrc = (Y >> MLIB_SHIFT)-1; \
1228 data1 = dpSrc[1]; \
1229 v22 = vis_fpadd16(u20, u21); \
1230 u11 = vis_fmul8ulx16(row31, yFilter3); \
1231 data2 = dpSrc[2]; \
1232 sum0 = vis_fpadd16(sum0, v20); \
1233 u20 = vis_fmul8sux16(row32, yFilter3); \
1234 data3 = dpSrc[3]; \
1235 u21 = vis_fmul8ulx16(row32, yFilter3); \
1236 v23 = vis_fpadd16(u30, u31); \
1237 data4 = dpSrc[4]; \
1238 v30 = vis_fpadd16(u00, u01); \
1239 filterposy &= FILTER_MASK; \
1240 row20 = vis_faligndata(data0, data1); \
1241 sum1 = vis_fpadd16(sum1, v21); \
1242 u30 = vis_fmul8sux16(row33, yFilter3); \
1243 row21 = vis_faligndata(data1, data2); \
1244 u31 = vis_fmul8ulx16(row33, yFilter3); \
1245 row22 = vis_faligndata(data2, data3); \
1246 row23 = vis_faligndata(data3, data4); \
1247 sPtr += srcYStride; \
1248 filterposx &= FILTER_MASK; \
1249 v31 = vis_fpadd16(u10, u11); \
1250 dpSrc = vis_alignaddr(sPtr, 0); \
1251 data0 = dpSrc[0]; \
1252 sum2 = vis_fpadd16(sum2, v22); \
1253 sum3 = vis_fpadd16(sum3, v23); \
1254 data1 = dpSrc[1]; \
1255 v32 = vis_fpadd16(u20, u21); \
1256 data2 = dpSrc[2]; \
1257 sum0 = vis_fpadd16(sum0, v30); \
1258 data3 = dpSrc[3]; \
1259 v33 = vis_fpadd16(u30, u31); \
1260 data4 = dpSrc[4]; \
1261 row30 = vis_faligndata(data0, data1); \
1262 v00 = vis_fmul8sux16(sum0, xFilter0); \
1263 row31 = vis_faligndata(data1, data2); \
1264 row32 = vis_faligndata(data2, data3); \
1265 row33 = vis_faligndata(data3, data4); \
1266 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
1267 sum1 = vis_fpadd16(sum1, v31); \
1268 yFilter0 = yPtr[0]; \
1269 sum2 = vis_fpadd16(sum2, v32); \
1270 v01 = vis_fmul8ulx16(sum0, xFilter0); \
1271 yFilter1 = yPtr[1]; \
1272 v10 = vis_fmul8sux16(sum1, xFilter1); \
1273 sum3 = vis_fpadd16(sum3, v33); \
1274 yFilter2 = yPtr[2]; \
1275 v11 = vis_fmul8ulx16(sum1, xFilter1); \
1276 d0 = vis_fpadd16(v00, v01); \
1277 yFilter3 = yPtr[3]; \
1278 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \
1279 v20 = vis_fmul8sux16(sum2, xFilter2); \
1280 xFilter0 = xPtr[0]; \
1281 v21 = vis_fmul8ulx16(sum2, xFilter2); \
1282 d1 = vis_fpadd16(v10, v11); \
1283 xFilter1 = xPtr[1]; \
1284 v30 = vis_fmul8sux16(sum3, xFilter3); \
1285 v31 = vis_fmul8ulx16(sum3, xFilter3); \
1286 d2 = vis_fpadd16(v20, v21); \
1287 xFilter2 = xPtr[2]; \
1288 d3 = vis_fpadd16(v30, v31); \
1289 xFilter3 = xPtr[3]; \
1290 sPtr = (mlib_s16 *)lineAddr[ySrc] + (xSrc << 2)
1291
1292 /***************************************************************/
1293 #define FADD_4BC_S16() \
1294 d0 = vis_fpadd16(d0, d1); \
1295 d2 = vis_fpadd16(d2, d3); \
1296 d0 = vis_fpadd16(d0, d2); \
1297 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \
1298 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \
1299 res = vis_fpackfix_pair(d2, d3)
1300
1301 /***************************************************************/
mlib_ImageAffine_s16_4ch_bc(mlib_affine_param * param)1302 mlib_status mlib_ImageAffine_s16_4ch_bc (mlib_affine_param *param)
1303 {
1304 DECLAREVAR_BC();
1305 DTYPE *dstLineEnd;
1306 mlib_s32 filterposx, filterposy;
1307 mlib_d64 data0, data1, data2, data3, data4;
1308 mlib_d64 sum0, sum1, sum2, sum3;
1309 mlib_d64 row00, row10, row20, row30;
1310 mlib_d64 row01, row11, row21, row31;
1311 mlib_d64 row02, row12, row22, row32;
1312 mlib_d64 row03, row13, row23, row33;
1313 mlib_d64 xFilter0, xFilter1, xFilter2, xFilter3;
1314 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3;
1315 mlib_d64 v00, v01, v02, v03, v10, v11, v12, v13;
1316 mlib_d64 v20, v21, v22, v23, v30, v31, v32, v33;
1317 mlib_d64 u00, u01, u10, u11, u20, u21, u30, u31;
1318 mlib_d64 d0, d1, d2, d3;
1319 mlib_d64 *yPtr, *xPtr;
1320 mlib_d64 *dp, *dpSrc;
1321 mlib_s32 cols, i, mask, gsrd;
1322 mlib_d64 res;
1323 mlib_f32 f_x01000100 = vis_to_float(0x01000100);
1324 const mlib_s16 *mlib_filters_table_4;
1325
1326 if (filter == MLIB_BICUBIC) {
1327 mlib_filters_table_4 = mlib_filters_s16_bc_4;
1328 } else {
1329 mlib_filters_table_4 = mlib_filters_s16_bc2_4;
1330 }
1331
1332 srcYStride >>= 1;
1333
1334 for (j = yStart; j <= yFinish; j++) {
1335
1336 vis_write_gsr(10 << 3);
1337
1338 CLIP(4);
1339 dstLineEnd = (DTYPE*)dstData + 4 * xRight;
1340
1341 cols = xRight - xLeft + 1;
1342 dp = vis_alignaddr(dstPixelPtr, 0);
1343 dstLineEnd += 3;
1344 mask = vis_edge16(dstPixelPtr, dstLineEnd);
1345 gsrd = ((8 - (mlib_addr)dstPixelPtr) & 7);
1346
1347 i = 0;
1348
1349 if (i <= cols - 4) {
1350
1351 NEXT_PIXEL_4BC_S16();
1352 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4);
1353
1354 NEXT_PIXEL_4BC_S16();
1355
1356 BC_S16_4CH(mlib_filters_table_4);
1357 FADD_4BC_S16();
1358
1359 BC_S16_4CH(mlib_filters_table_4);
1360
1361 #pragma pipeloop(0)
1362 for (; i < cols-4; i++) {
1363 vis_alignaddr((void *)gsrd, 0);
1364 res = vis_faligndata(res, res);
1365
1366 vis_pst_16(res, dp++, mask);
1367 vis_pst_16(res, dp, ~mask);
1368
1369 FADD_4BC_S16();
1370 BC_S16_4CH(mlib_filters_table_4);
1371 }
1372
1373 vis_alignaddr((void *)gsrd, 0);
1374 res = vis_faligndata(res, res);
1375 vis_pst_16(res, dp++, mask);
1376 vis_pst_16(res, dp, ~mask);
1377
1378 FADD_4BC_S16();
1379 vis_alignaddr((void *)gsrd, 0);
1380 res = vis_faligndata(res, res);
1381 vis_pst_16(res, dp++, mask);
1382 vis_pst_16(res, dp, ~mask);
1383
1384 RESULT_4BC_S16_1PIXEL();
1385 vis_alignaddr((void *)gsrd, 0);
1386 res = vis_faligndata(res, res);
1387 vis_pst_16(res, dp++, mask);
1388 vis_pst_16(res, dp, ~mask);
1389
1390 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4);
1391 RESULT_4BC_S16_1PIXEL();
1392 vis_alignaddr((void *)gsrd, 0);
1393 res = vis_faligndata(res, res);
1394 vis_pst_16(res, dp++, mask);
1395 vis_pst_16(res, dp, ~mask);
1396 i += 4;
1397 }
1398
1399 #pragma pipeloop(0)
1400 for (; i < cols; i++) {
1401 NEXT_PIXEL_4BC_S16();
1402 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_4);
1403 RESULT_4BC_S16_1PIXEL();
1404 vis_alignaddr((void *)gsrd, 0);
1405 res = vis_faligndata(res, res);
1406 vis_pst_16(res, dp++, mask);
1407 vis_pst_16(res, dp, ~mask);
1408 }
1409 }
1410
1411 return MLIB_SUCCESS;
1412 }
1413
1414 /***************************************************************/
1415