1 /*
2 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27 /*
28 * FUNCTION
29 * Internal functions for mlib_ImageConv* on S32 type and
30 * MLIB_EDGE_DST_NO_WRITE mask
31 *
32 */
33
34 #include "mlib_image.h"
35 #include "mlib_ImageConv.h"
36
37 /***************************************************************/
38 #define BUFF_LINE 256
39
40 #define CACHE_SIZE (64*1024)
41
42 /***************************************************************/
43 #define CONV_FUNC(KERN) mlib_conv##KERN##nw_s32
44
45 /***************************************************************/
46 #ifndef MLIB_USE_FTOI_CLAMPING
47
48 #define CLAMP_S32(dst, src) \
49 if (src > (mlib_d64)MLIB_S32_MAX) src = (mlib_d64)MLIB_S32_MAX; \
50 if (src < (mlib_d64)MLIB_S32_MIN) src = (mlib_d64)MLIB_S32_MIN; \
51 dst = (mlib_s32)src
52
53 #else
54
55 #define CLAMP_S32(dst, src) dst = (mlib_s32)(src)
56
57 #endif /* MLIB_USE_FTOI_CLAMPING */
58
59 /***************************************************************/
60 #define GET_SRC_DST_PARAMETERS(type) \
61 mlib_s32 hgt = mlib_ImageGetHeight(src); \
62 mlib_s32 wid = mlib_ImageGetWidth(src); \
63 mlib_s32 sll = mlib_ImageGetStride(src) / sizeof(type); \
64 mlib_s32 dll = mlib_ImageGetStride(dst) / sizeof(type); \
65 type* adr_src = mlib_ImageGetData(src); \
66 type* adr_dst = mlib_ImageGetData(dst); \
67 mlib_s32 chan1 = mlib_ImageGetChannels(src)
68 /* mlib_s32 chan2 = chan1 + chan1 */
69
70 /***************************************************************/
71 #define DEF_VARS(type) \
72 GET_SRC_DST_PARAMETERS(type); \
73 type *sl, *sp, *sl1, *dl, *dp; \
74 mlib_d64 *pbuff = buff, *buff0, *buff1, *buff2, *buffT; \
75 mlib_s32 i, j, c; \
76 mlib_d64 scalef, d0, d1
77
78 /***************************************************************/
79 #define DEF_VARS_MxN(type) \
80 GET_SRC_DST_PARAMETERS(type); \
81 type *sl, *sp = NULL, *dl, *dp = NULL; \
82 mlib_d64 *pbuff = buff; \
83 mlib_s32 i, j, c
84
85 /***************************************************************/
86 #define CALC_SCALE() \
87 scalef = 1.0; \
88 while (scalef_expon > 30) { \
89 scalef /= (1 << 30); \
90 scalef_expon -= 30; \
91 } \
92 \
93 scalef /= (1 << scalef_expon)
94
95 /***************************************************************/
96 #undef KSIZE
97 #define KSIZE 2
98
99 mlib_status CONV_FUNC(2x2)(mlib_image *dst,
100 const mlib_image *src,
101 const mlib_s32 *kern,
102 mlib_s32 scalef_expon,
103 mlib_s32 cmask)
104 {
105 mlib_d64 buff[(KSIZE + 1)*BUFF_LINE];
106 mlib_d64 k0, k1, k2, k3;
107 mlib_d64 p00, p01, p02, p03,
108 p10, p11, p12, p13;
109 mlib_d64 d2;
110 DEF_VARS(mlib_s32);
111 mlib_s32 chan2 = chan1 + chan1;
112 mlib_s32 chan3 = chan1 + chan2;
113
114 if (wid > BUFF_LINE) {
115 pbuff = mlib_malloc((KSIZE + 1)*sizeof(mlib_d64)*wid);
116
117 if (pbuff == NULL) return MLIB_FAILURE;
118 }
119
120 buff0 = pbuff;
121 buff1 = buff0 + wid;
122 buff2 = buff1 + wid;
123
124 wid -= (KSIZE - 1);
125 hgt -= (KSIZE - 1);
126
127 /* keep kernel in regs */
128 CALC_SCALE();
129 k0 = scalef * kern[0]; k1 = scalef * kern[1];
130 k2 = scalef * kern[2]; k3 = scalef * kern[3];
131
132 for (c = 0; c < chan1; c++) {
133 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
134
135 sl = adr_src + c;
136 dl = adr_dst + c;
137
138 sl1 = sl + sll;
139 #ifdef __SUNPRO_C
140 #pragma pipeloop(0)
141 #endif /* __SUNPRO_C */
142 for (i = 0; i < wid + (KSIZE - 1); i++) {
143 buff0[i] = (mlib_d64)sl[i*chan1];
144 buff1[i] = (mlib_d64)sl1[i*chan1];
145 }
146
147 sl += KSIZE*sll;
148
149 for (j = 0; j < hgt; j++) {
150 p03 = buff0[0];
151 p13 = buff1[0];
152
153 sp = sl;
154 dp = dl;
155
156 #ifdef __SUNPRO_C
157 #pragma pipeloop(0)
158 #endif /* __SUNPRO_C */
159 for (i = 0; i <= (wid - 3); i += 3) {
160
161 p00 = p03; p10 = p13;
162
163 p01 = buff0[i + 1]; p11 = buff1[i + 1];
164 p02 = buff0[i + 2]; p12 = buff1[i + 2];
165 p03 = buff0[i + 3]; p13 = buff1[i + 3];
166
167 buff2[i ] = (mlib_d64)sp[0];
168 buff2[i + 1] = (mlib_d64)sp[chan1];
169 buff2[i + 2] = (mlib_d64)sp[chan2];
170
171 d0 = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3;
172 d1 = p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3;
173 d2 = p02 * k0 + p03 * k1 + p12 * k2 + p13 * k3;
174
175 CLAMP_S32(dp[0 ], d0);
176 CLAMP_S32(dp[chan1], d1);
177 CLAMP_S32(dp[chan2], d2);
178
179 sp += chan3;
180 dp += chan3;
181 }
182
183 for (; i < wid; i++) {
184 p00 = buff0[i]; p10 = buff1[i];
185 p01 = buff0[i + 1]; p11 = buff1[i + 1];
186
187 buff2[i] = (mlib_d64)sp[0];
188
189 d0 = p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3;
190 CLAMP_S32(dp[0], d0);
191
192 sp += chan1;
193 dp += chan1;
194 }
195
196 buff2[wid] = (mlib_d64)sp[0];
197
198 sl += sll;
199 dl += dll;
200
201 buffT = buff0;
202 buff0 = buff1;
203 buff1 = buff2;
204 buff2 = buffT;
205 }
206 }
207
208 if (pbuff != buff) mlib_free(pbuff);
209
210 return MLIB_SUCCESS;
211 }
212
213 /***************************************************************/
214 #undef KSIZE
215 #define KSIZE 3
216
217 mlib_status CONV_FUNC(3x3)(mlib_image *dst,
218 const mlib_image *src,
219 const mlib_s32 *kern,
220 mlib_s32 scalef_expon,
221 mlib_s32 cmask)
222 {
223 mlib_d64 buff[(KSIZE + 1)*BUFF_LINE], *buff3;
224 mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7, k8;
225 mlib_d64 p00, p01, p02, p03,
226 p10, p11, p12, p13,
227 p20, p21, p22, p23;
228 mlib_s32 *sl2;
229 DEF_VARS(mlib_s32);
230 mlib_s32 chan2 = chan1 + chan1;
231
232 if (wid > BUFF_LINE) {
233 pbuff = mlib_malloc((KSIZE + 1)*sizeof(mlib_d64)*wid);
234
235 if (pbuff == NULL) return MLIB_FAILURE;
236 }
237
238 buff0 = pbuff;
239 buff1 = buff0 + wid;
240 buff2 = buff1 + wid;
241 buff3 = buff2 + wid;
242
243 wid -= (KSIZE - 1);
244 hgt -= (KSIZE - 1);
245
246 adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
247
248 CALC_SCALE();
249 k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2];
250 k3 = scalef * kern[3]; k4 = scalef * kern[4]; k5 = scalef * kern[5];
251 k6 = scalef * kern[6]; k7 = scalef * kern[7]; k8 = scalef * kern[8];
252
253 for (c = 0; c < chan1; c++) {
254 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
255
256 sl = adr_src + c;
257 dl = adr_dst + c;
258
259 sl1 = sl + sll;
260 sl2 = sl1 + sll;
261 #ifdef __SUNPRO_C
262 #pragma pipeloop(0)
263 #endif /* __SUNPRO_C */
264 for (i = 0; i < wid + (KSIZE - 1); i++) {
265 buff0[i] = (mlib_d64)sl[i*chan1];
266 buff1[i] = (mlib_d64)sl1[i*chan1];
267 buff2[i] = (mlib_d64)sl2[i*chan1];
268 }
269
270 sl += KSIZE*sll;
271
272 for (j = 0; j < hgt; j++) {
273 mlib_d64 s0, s1;
274
275 p02 = buff0[0];
276 p12 = buff1[0];
277 p22 = buff2[0];
278
279 p03 = buff0[1];
280 p13 = buff1[1];
281 p23 = buff2[1];
282
283 sp = sl;
284 dp = dl;
285
286 s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
287 s1 = p03 * k0 + p13 * k3 + p23 * k6;
288
289 #ifdef __SUNPRO_C
290 #pragma pipeloop(0)
291 #endif /* __SUNPRO_C */
292 for (i = 0; i <= (wid - 2); i += 2) {
293 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
294 p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3];
295
296 buff3[i ] = (mlib_d64)sp[0];
297 buff3[i + 1] = (mlib_d64)sp[chan1];
298
299 d0 = s0 + p02 * k2 + p12 * k5 + p22 * k8;
300 d1 = s1 + p02 * k1 + p03 * k2 + p12 * k4 + p13 * k5 + p22 * k7 + p23 * k8;
301
302 CLAMP_S32(dp[0 ], d0);
303 CLAMP_S32(dp[chan1], d1);
304
305 s0 = p02 * k0 + p03 * k1 + p12 * k3 + p13 * k4 + p22 * k6 + p23 * k7;
306 s1 = p03 * k0 + p13 * k3 + p23 * k6;
307
308 sp += chan2;
309 dp += chan2;
310 }
311
312 for (; i < wid; i++) {
313 p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i];
314 p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1];
315 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2];
316
317 buff3[i] = (mlib_d64)sp[0];
318
319 d0 = (p00 * k0 + p01 * k1 + p02 * k2 + p10 * k3 + p11 * k4 +
320 p12 * k5 + p20 * k6 + p21 * k7 + p22 * k8);
321
322 CLAMP_S32(dp[0], d0);
323
324 sp += chan1;
325 dp += chan1;
326 }
327
328 buff3[wid ] = (mlib_d64)sp[0];
329 buff3[wid + 1] = (mlib_d64)sp[chan1];
330
331 sl += sll;
332 dl += dll;
333
334 buffT = buff0;
335 buff0 = buff1;
336 buff1 = buff2;
337 buff2 = buff3;
338 buff3 = buffT;
339 }
340 }
341
342 if (pbuff != buff) mlib_free(pbuff);
343
344 return MLIB_SUCCESS;
345 }
346
347 /***************************************************************/
348 #undef KSIZE
349 #define KSIZE 4
350
351 mlib_status CONV_FUNC(4x4)(mlib_image *dst,
352 const mlib_image *src,
353 const mlib_s32 *kern,
354 mlib_s32 scalef_expon,
355 mlib_s32 cmask)
356 {
357 mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buff3, *buff4, *buff5;
358 mlib_d64 k[KSIZE*KSIZE];
359 mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7;
360 mlib_d64 p00, p01, p02, p03, p04,
361 p10, p11, p12, p13, p14,
362 p20, p21, p22, p23,
363 p30, p31, p32, p33;
364 mlib_s32 *sl2, *sl3;
365 DEF_VARS(mlib_s32);
366 mlib_s32 chan2 = chan1 + chan1;
367
368 if (wid > BUFF_LINE) {
369 pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid);
370
371 if (pbuff == NULL) return MLIB_FAILURE;
372 }
373
374 buff0 = pbuff;
375 buff1 = buff0 + wid;
376 buff2 = buff1 + wid;
377 buff3 = buff2 + wid;
378 buff4 = buff3 + wid;
379 buff5 = buff4 + wid;
380
381 wid -= (KSIZE - 1);
382 hgt -= (KSIZE - 1);
383
384 adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
385
386 CALC_SCALE();
387 for (j = 0; j < 16; j++) k[j] = scalef * kern[j];
388
389 for (c = 0; c < chan1; c++) {
390 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
391
392 sl = adr_src + c;
393 dl = adr_dst + c;
394
395 sl1 = sl + sll;
396 sl2 = sl1 + sll;
397 sl3 = sl2 + sll;
398 #ifdef __SUNPRO_C
399 #pragma pipeloop(0)
400 #endif /* __SUNPRO_C */
401 for (i = 0; i < wid + (KSIZE - 1); i++) {
402 buff0[i] = (mlib_d64)sl[i*chan1];
403 buff1[i] = (mlib_d64)sl1[i*chan1];
404 buff2[i] = (mlib_d64)sl2[i*chan1];
405 buff3[i] = (mlib_d64)sl3[i*chan1];
406 }
407
408 sl += KSIZE*sll;
409
410 for (j = 0; j < hgt; j++) {
411 /*
412 * First loop on two first lines of kernel
413 */
414 k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3];
415 k4 = k[4]; k5 = k[5]; k6 = k[6]; k7 = k[7];
416
417 sp = sl;
418 dp = dl;
419
420 p02 = buff0[0];
421 p12 = buff1[0];
422 p03 = buff0[1];
423 p13 = buff1[1];
424 p04 = buff0[2];
425
426 #ifdef __SUNPRO_C
427 #pragma pipeloop(0)
428 #endif /* __SUNPRO_C */
429 for (i = 0; i <= (wid - 2); i += 2) {
430 p00 = p02; p10 = p12;
431 p01 = p03; p11 = p13;
432 p02 = p04; p12 = buff1[i + 2];
433 p03 = buff0[i + 3]; p13 = buff1[i + 3];
434 p04 = buff0[i + 4]; p14 = buff1[i + 4];
435
436 buff4[i] = (mlib_d64)sp[0];
437 buff4[i + 1] = (mlib_d64)sp[chan1];
438
439 buff5[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
440 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7);
441 buff5[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
442 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7);
443
444 sp += chan2;
445 dp += chan2;
446 }
447
448 /*
449 * Second loop on two last lines of kernel
450 */
451 k0 = k[ 8]; k1 = k[ 9]; k2 = k[10]; k3 = k[11];
452 k4 = k[12]; k5 = k[13]; k6 = k[14]; k7 = k[15];
453
454 sp = sl;
455 dp = dl;
456
457 p02 = buff2[0];
458 p12 = buff3[0];
459 p03 = buff2[1];
460 p13 = buff3[1];
461 p04 = buff2[2];
462
463 #ifdef __SUNPRO_C
464 #pragma pipeloop(0)
465 #endif /* __SUNPRO_C */
466 for (i = 0; i <= (wid - 2); i += 2) {
467 p00 = p02; p10 = p12;
468 p01 = p03; p11 = p13;
469 p02 = p04; p12 = buff3[i + 2];
470 p03 = buff2[i + 3]; p13 = buff3[i + 3];
471 p04 = buff2[i + 4]; p14 = buff3[i + 4];
472
473 d0 = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 +
474 p10 * k4 + p11 * k5 + p12 * k6 + p13 * k7 + buff5[i]);
475 d1 = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 +
476 p11 * k4 + p12 * k5 + p13 * k6 + p14 * k7 + buff5[i + 1]);
477
478 CLAMP_S32(dp[0 ], d0);
479 CLAMP_S32(dp[chan1], d1);
480
481 sp += chan2;
482 dp += chan2;
483 }
484
485 /* last pixels */
486 for (; i < wid; i++) {
487 p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i];
488 p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
489 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
490 p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
491
492 buff4[i] = (mlib_d64)sp[0];
493
494 d0 = (p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] +
495 p10 * k[4] + p11 * k[5] + p12 * k[6] + p13 * k[7] +
496 p20 * k[ 8] + p21 * k[ 9] + p22 * k[10] + p23 * k[11] +
497 p30 * k[12] + p31 * k[13] + p32 * k[14] + p33 * k[15]);
498
499 CLAMP_S32(dp[0], d0);
500
501 sp += chan1;
502 dp += chan1;
503 }
504
505 buff4[wid ] = (mlib_d64)sp[0];
506 buff4[wid + 1] = (mlib_d64)sp[chan1];
507 buff4[wid + 2] = (mlib_d64)sp[chan2];
508
509 /* next line */
510 sl += sll;
511 dl += dll;
512
513 buffT = buff0;
514 buff0 = buff1;
515 buff1 = buff2;
516 buff2 = buff3;
517 buff3 = buff4;
518 buff4 = buffT;
519 }
520 }
521
522 if (pbuff != buff) mlib_free(pbuff);
523
524 return MLIB_SUCCESS;
525 }
526
527 /***************************************************************/
528 #undef KSIZE
529 #define KSIZE 5
530
531 mlib_status CONV_FUNC(5x5)(mlib_image *dst,
532 const mlib_image *src,
533 const mlib_s32 *kern,
534 mlib_s32 scalef_expon,
535 mlib_s32 cmask)
536 {
537 mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buff3, *buff4, *buff5, *buff6;
538 mlib_d64 k[KSIZE*KSIZE];
539 mlib_d64 k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
540 mlib_d64 p00, p01, p02, p03, p04, p05,
541 p10, p11, p12, p13, p14, p15,
542 p20, p21, p22, p23, p24,
543 p30, p31, p32, p33, p34,
544 p40, p41, p42, p43, p44;
545 mlib_s32 *sl2, *sl3, *sl4;
546 DEF_VARS(mlib_s32);
547 mlib_s32 chan2 = chan1 + chan1;
548 mlib_s32 chan3 = chan1 + chan2;
549
550 if (wid > BUFF_LINE) {
551 pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid);
552
553 if (pbuff == NULL) return MLIB_FAILURE;
554 }
555
556 buff0 = pbuff;
557 buff1 = buff0 + wid;
558 buff2 = buff1 + wid;
559 buff3 = buff2 + wid;
560 buff4 = buff3 + wid;
561 buff5 = buff4 + wid;
562 buff6 = buff5 + wid;
563
564 wid -= (KSIZE - 1);
565 hgt -= (KSIZE - 1);
566
567 adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
568
569 CALC_SCALE();
570 for (j = 0; j < 25; j++) k[j] = scalef * kern[j];
571
572 for (c = 0; c < chan1; c++) {
573 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
574
575 sl = adr_src + c;
576 dl = adr_dst + c;
577
578 sl1 = sl + sll;
579 sl2 = sl1 + sll;
580 sl3 = sl2 + sll;
581 sl4 = sl3 + sll;
582 #ifdef __SUNPRO_C
583 #pragma pipeloop(0)
584 #endif /* __SUNPRO_C */
585 for (i = 0; i < wid + (KSIZE - 1); i++) {
586 buff0[i] = (mlib_d64)sl[i*chan1];
587 buff1[i] = (mlib_d64)sl1[i*chan1];
588 buff2[i] = (mlib_d64)sl2[i*chan1];
589 buff3[i] = (mlib_d64)sl3[i*chan1];
590 buff4[i] = (mlib_d64)sl4[i*chan1];
591 }
592
593 sl += KSIZE*sll;
594
595 for (j = 0; j < hgt; j++) {
596 /*
597 * First loop
598 */
599 k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3]; k4 = k[4];
600 k5 = k[5]; k6 = k[6]; k7 = k[7]; k8 = k[8]; k9 = k[9];
601
602 sp = sl;
603 dp = dl;
604
605 p02 = buff0[0];
606 p12 = buff1[0];
607 p03 = buff0[1];
608 p13 = buff1[1];
609 p04 = buff0[2];
610 p14 = buff1[2];
611
612 #ifdef __SUNPRO_C
613 #pragma pipeloop(0)
614 #endif /* __SUNPRO_C */
615 for (i = 0; i <= (wid - 2); i += 2) {
616 p00 = p02; p10 = p12;
617 p01 = p03; p11 = p13;
618 p02 = p04; p12 = p14;
619
620 p03 = buff0[i + 3]; p13 = buff1[i + 3];
621 p04 = buff0[i + 4]; p14 = buff1[i + 4];
622 p05 = buff0[i + 5]; p15 = buff1[i + 5];
623
624 buff6[i ] = (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
625 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
626 buff6[i + 1] = (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
627 p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
628
629 sp += chan2;
630 dp += chan2;
631 }
632
633 /*
634 * Second loop
635 */
636 k0 = k[10]; k1 = k[11]; k2 = k[12]; k3 = k[13]; k4 = k[14];
637 k5 = k[15]; k6 = k[16]; k7 = k[17]; k8 = k[18]; k9 = k[19];
638
639 sp = sl;
640 dp = dl;
641
642 p02 = buff2[0];
643 p12 = buff3[0];
644 p03 = buff2[1];
645 p13 = buff3[1];
646
647 #ifdef __SUNPRO_C
648 #pragma pipeloop(0)
649 #endif /* __SUNPRO_C */
650 for (i = 0; i <= (wid - 2); i += 2) {
651 p00 = p02; p10 = p12;
652 p01 = p03; p11 = p13;
653
654 p02 = buff2[i + 2]; p12 = buff3[i + 2];
655 p03 = buff2[i + 3]; p13 = buff3[i + 3];
656 p04 = buff2[i + 4]; p14 = buff3[i + 4];
657 p05 = buff2[i + 5]; p15 = buff3[i + 5];
658
659 buff6[i ] += (p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 +
660 p10 * k5 + p11 * k6 + p12 * k7 + p13 * k8 + p14 * k9);
661 buff6[i + 1] += (p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 +
662 p11 * k5 + p12 * k6 + p13 * k7 + p14 * k8 + p15 * k9);
663
664 sp += chan2;
665 dp += chan2;
666 }
667
668 /*
669 * 3 loop
670 */
671 k0 = k[20]; k1 = k[21]; k2 = k[22]; k3 = k[23]; k4 = k[24];
672
673 sp = sl;
674 dp = dl;
675
676 p02 = buff4[0];
677 p03 = buff4[1];
678 p04 = buff4[2];
679 p05 = buff4[3];
680
681 #ifdef __SUNPRO_C
682 #pragma pipeloop(0)
683 #endif /* __SUNPRO_C */
684 for (i = 0; i <= (wid - 2); i += 2) {
685 p00 = p02; p01 = p03; p02 = p04; p03 = p05;
686
687 p04 = buff4[i + 4]; p05 = buff4[i + 5];
688
689 buff5[i ] = (mlib_d64)sp[0];
690 buff5[i + 1] = (mlib_d64)sp[chan1];
691
692 d0 = p00 * k0 + p01 * k1 + p02 * k2 + p03 * k3 + p04 * k4 + buff6[i];
693 d1 = p01 * k0 + p02 * k1 + p03 * k2 + p04 * k3 + p05 * k4 + buff6[i + 1];
694
695 CLAMP_S32(dp[0 ], d0);
696 CLAMP_S32(dp[chan1], d1);
697
698 sp += chan2;
699 dp += chan2;
700 }
701
702 /* last pixels */
703 for (; i < wid; i++) {
704 p00 = buff0[i]; p10 = buff1[i]; p20 = buff2[i]; p30 = buff3[i];
705 p01 = buff0[i + 1]; p11 = buff1[i + 1]; p21 = buff2[i + 1]; p31 = buff3[i + 1];
706 p02 = buff0[i + 2]; p12 = buff1[i + 2]; p22 = buff2[i + 2]; p32 = buff3[i + 2];
707 p03 = buff0[i + 3]; p13 = buff1[i + 3]; p23 = buff2[i + 3]; p33 = buff3[i + 3];
708 p04 = buff0[i + 4]; p14 = buff1[i + 4]; p24 = buff2[i + 4]; p34 = buff3[i + 4];
709
710 p40 = buff4[i]; p41 = buff4[i + 1]; p42 = buff4[i + 2];
711 p43 = buff4[i + 3]; p44 = buff4[i + 4];
712
713 buff5[i] = (mlib_d64)sp[0];
714
715 d0 = (p00 * k[0] + p01 * k[1] + p02 * k[2] + p03 * k[3] + p04 * k[4] +
716 p10 * k[5] + p11 * k[6] + p12 * k[7] + p13 * k[8] + p14 * k[9] +
717 p20 * k[10] + p21 * k[11] + p22 * k[12] + p23 * k[13] + p24 * k[14] +
718 p30 * k[15] + p31 * k[16] + p32 * k[17] + p33 * k[18] + p34 * k[19] +
719 p40 * k[20] + p41 * k[21] + p42 * k[22] + p43 * k[23] + p44 * k[24]);
720
721 CLAMP_S32(dp[0], d0);
722
723 sp += chan1;
724 dp += chan1;
725 }
726
727 buff5[wid ] = (mlib_d64)sp[0];
728 buff5[wid + 1] = (mlib_d64)sp[chan1];
729 buff5[wid + 2] = (mlib_d64)sp[chan2];
730 buff5[wid + 3] = (mlib_d64)sp[chan3];
731
732 /* next line */
733 sl += sll;
734 dl += dll;
735
736 buffT = buff0;
737 buff0 = buff1;
738 buff1 = buff2;
739 buff2 = buff3;
740 buff3 = buff4;
741 buff4 = buff5;
742 buff5 = buffT;
743 }
744 }
745
746 if (pbuff != buff) mlib_free(pbuff);
747
748 return MLIB_SUCCESS;
749 }
750
751 /***************************************************************/
752 #undef KSIZE
753 #define KSIZE 7
754
755 mlib_status CONV_FUNC(7x7)(mlib_image *dst,
756 const mlib_image *src,
757 const mlib_s32 *kern,
758 mlib_s32 scalef_expon,
759 mlib_s32 cmask)
760 {
761 mlib_d64 buff[(KSIZE + 2)*BUFF_LINE], *buffs[2*(KSIZE + 1)], *buffd;
762 mlib_d64 k[KSIZE*KSIZE];
763 mlib_d64 k0, k1, k2, k3, k4, k5, k6;
764 mlib_d64 p0, p1, p2, p3, p4, p5, p6, p7;
765 mlib_d64 d0, d1;
766 mlib_s32 l, m, buff_ind, *sl2, *sl3, *sl4, *sl5, *sl6;
767 mlib_d64 scalef;
768 DEF_VARS_MxN(mlib_s32);
769 mlib_s32 chan2 = chan1 + chan1;
770 mlib_s32 *sl1;
771
772 if (wid > BUFF_LINE) {
773 pbuff = mlib_malloc((KSIZE + 2)*sizeof(mlib_d64)*wid);
774
775 if (pbuff == NULL) return MLIB_FAILURE;
776 }
777
778 for (l = 0; l < KSIZE + 1; l++) buffs[l] = pbuff + l*wid;
779 for (l = 0; l < KSIZE + 1; l++) buffs[l + (KSIZE + 1)] = buffs[l];
780 buffd = buffs[KSIZE] + wid;
781
782 wid -= (KSIZE - 1);
783 hgt -= (KSIZE - 1);
784
785 adr_dst += ((KSIZE - 1)/2)*(dll + chan1);
786
787 CALC_SCALE();
788 for (j = 0; j < 49; j++) k[j] = scalef * kern[j];
789
790 for (c = 0; c < chan1; c++) {
791 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
792
793 sl = adr_src + c;
794 dl = adr_dst + c;
795
796 sl1 = sl + sll;
797 sl2 = sl1 + sll;
798 sl3 = sl2 + sll;
799 sl4 = sl3 + sll;
800 sl5 = sl4 + sll;
801 sl6 = sl5 + sll;
802 #ifdef __SUNPRO_C
803 #pragma pipeloop(0)
804 #endif /* __SUNPRO_C */
805 for (i = 0; i < wid + (KSIZE - 1); i++) {
806 buffs[0][i] = (mlib_d64)sl[i*chan1];
807 buffs[1][i] = (mlib_d64)sl1[i*chan1];
808 buffs[2][i] = (mlib_d64)sl2[i*chan1];
809 buffs[3][i] = (mlib_d64)sl3[i*chan1];
810 buffs[4][i] = (mlib_d64)sl4[i*chan1];
811 buffs[5][i] = (mlib_d64)sl5[i*chan1];
812 buffs[6][i] = (mlib_d64)sl6[i*chan1];
813 }
814
815 buff_ind = 0;
816
817 #ifdef __SUNPRO_C
818 #pragma pipeloop(0)
819 #endif /* __SUNPRO_C */
820 for (i = 0; i < wid; i++) buffd[i] = 0.0;
821
822 sl += KSIZE*sll;
823
824 for (j = 0; j < hgt; j++) {
825 mlib_d64 **buffc = buffs + buff_ind;
826 mlib_d64 *buffn = buffc[KSIZE];
827 mlib_d64 *pk = k;
828
829 for (l = 0; l < KSIZE; l++) {
830 mlib_d64 *buff = buffc[l];
831
832 sp = sl;
833 dp = dl;
834
835 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
836 p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
837
838 k0 = *pk++; k1 = *pk++; k2 = *pk++; k3 = *pk++;
839 k4 = *pk++; k5 = *pk++; k6 = *pk++;
840
841 if (l < (KSIZE - 1)) {
842 #ifdef __SUNPRO_C
843 #pragma pipeloop(0)
844 #endif /* __SUNPRO_C */
845 for (i = 0; i <= (wid - 2); i += 2) {
846 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
847
848 p6 = buff[i + 6]; p7 = buff[i + 7];
849
850 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
851 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
852 }
853
854 } else {
855 #ifdef __SUNPRO_C
856 #pragma pipeloop(0)
857 #endif /* __SUNPRO_C */
858 for (i = 0; i <= (wid - 2); i += 2) {
859 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
860
861 p6 = buff[i + 6]; p7 = buff[i + 7];
862
863 buffn[i ] = (mlib_d64)sp[0];
864 buffn[i + 1] = (mlib_d64)sp[chan1];
865
866 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ];
867 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1];
868
869 CLAMP_S32(dp[0 ], d0);
870 CLAMP_S32(dp[chan1], d1);
871
872 buffd[i ] = 0.0;
873 buffd[i + 1] = 0.0;
874
875 sp += chan2;
876 dp += chan2;
877 }
878 }
879 }
880
881 /* last pixels */
882 for (; i < wid; i++) {
883 mlib_d64 *pk = k, s = 0;
884
885 for (l = 0; l < KSIZE; l++) {
886 mlib_d64 *buff = buffc[l] + i;
887
888 for (m = 0; m < KSIZE; m++) s += buff[m] * (*pk++);
889 }
890
891 CLAMP_S32(dp[0], s);
892
893 buffn[i] = (mlib_d64)sp[0];
894
895 sp += chan1;
896 dp += chan1;
897 }
898
899 for (l = 0; l < (KSIZE - 1); l++) buffn[wid + l] = sp[l*chan1];
900
901 /* next line */
902 sl += sll;
903 dl += dll;
904
905 buff_ind++;
906
907 if (buff_ind >= KSIZE + 1) buff_ind = 0;
908 }
909 }
910
911 if (pbuff != buff) mlib_free(pbuff);
912
913 return MLIB_SUCCESS;
914 }
915
916 /***************************************************************/
917 #define FTYPE mlib_d64
918 #define DTYPE mlib_s32
919
920 #define BUFF_SIZE 1600
921
mlib_ImageConv1xN(mlib_image * dst,const mlib_image * src,const mlib_d64 * k,mlib_s32 n,mlib_s32 dn,mlib_s32 cmask)922 static mlib_status mlib_ImageConv1xN(mlib_image *dst,
923 const mlib_image *src,
924 const mlib_d64 *k,
925 mlib_s32 n,
926 mlib_s32 dn,
927 mlib_s32 cmask)
928 {
929 FTYPE buff[BUFF_SIZE];
930 mlib_s32 off, kh;
931 const FTYPE *pk;
932 FTYPE k0, k1, k2, k3, d0, d1;
933 FTYPE p0, p1, p2, p3, p4;
934 DTYPE *sl_c, *dl_c, *sl0;
935 mlib_s32 l, hsize, max_hsize;
936 DEF_VARS_MxN(DTYPE);
937
938 hgt -= (n - 1);
939 adr_dst += dn*dll;
940
941 max_hsize = (CACHE_SIZE/sizeof(DTYPE))/sll;
942
943 if (!max_hsize) max_hsize = 1;
944
945 if (max_hsize > BUFF_SIZE) {
946 pbuff = mlib_malloc(sizeof(FTYPE)*max_hsize);
947 }
948
949 sl_c = adr_src;
950 dl_c = adr_dst;
951
952 for (l = 0; l < hgt; l += hsize) {
953 hsize = hgt - l;
954
955 if (hsize > max_hsize) hsize = max_hsize;
956
957 for (c = 0; c < chan1; c++) {
958 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
959
960 sl = sl_c + c;
961 dl = dl_c + c;
962
963 #ifdef __SUNPRO_C
964 #pragma pipeloop(0)
965 #endif /* __SUNPRO_C */
966 for (j = 0; j < hsize; j++) pbuff[j] = 0.0;
967
968 for (i = 0; i < wid; i++) {
969 sl0 = sl;
970
971 for (off = 0; off < (n - 4); off += 4) {
972 pk = k + off;
973 sp = sl0;
974
975 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
976 p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
977 sp += 3*sll;
978
979 #ifdef __SUNPRO_C
980 #pragma pipeloop(0)
981 #endif /* __SUNPRO_C */
982 for (j = 0; j < hsize; j += 2) {
983 p0 = p2; p1 = p3; p2 = p4;
984 p3 = sp[0];
985 p4 = sp[sll];
986
987 pbuff[j ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
988 pbuff[j + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
989
990 sp += 2*sll;
991 }
992
993 sl0 += 4*sll;
994 }
995
996 pk = k + off;
997 sp = sl0;
998
999 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
1000 p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
1001
1002 dp = dl;
1003 kh = n - off;
1004
1005 if (kh == 4) {
1006 sp += 3*sll;
1007
1008 #ifdef __SUNPRO_C
1009 #pragma pipeloop(0)
1010 #endif /* __SUNPRO_C */
1011 for (j = 0; j <= (hsize - 2); j += 2) {
1012 p0 = p2; p1 = p3; p2 = p4;
1013 p3 = sp[0];
1014 p4 = sp[sll];
1015
1016 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j];
1017 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1];
1018 CLAMP_S32(dp[0 ], d0);
1019 CLAMP_S32(dp[dll], d1);
1020
1021 pbuff[j] = 0;
1022 pbuff[j + 1] = 0;
1023
1024 sp += 2*sll;
1025 dp += 2*dll;
1026 }
1027
1028 if (j < hsize) {
1029 p0 = p2; p1 = p3; p2 = p4;
1030 p3 = sp[0];
1031
1032 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j];
1033 CLAMP_S32(dp[0], d0);
1034
1035 pbuff[j] = 0;
1036 }
1037
1038 } else if (kh == 3) {
1039 sp += 2*sll;
1040
1041 #ifdef __SUNPRO_C
1042 #pragma pipeloop(0)
1043 #endif /* __SUNPRO_C */
1044 for (j = 0; j <= (hsize - 2); j += 2) {
1045 p0 = p2; p1 = p3;
1046 p2 = sp[0];
1047 p3 = sp[sll];
1048
1049 d0 = p0*k0 + p1*k1 + p2*k2 + pbuff[j];
1050 d1 = p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1];
1051 CLAMP_S32(dp[0 ], d0);
1052 CLAMP_S32(dp[dll], d1);
1053
1054 pbuff[j] = 0;
1055 pbuff[j + 1] = 0;
1056
1057 sp += 2*sll;
1058 dp += 2*dll;
1059 }
1060
1061 if (j < hsize) {
1062 p0 = p2; p1 = p3;
1063 p2 = sp[0];
1064
1065 d0 = p0*k0 + p1*k1 + p2*k2 + pbuff[j];
1066 CLAMP_S32(dp[0], d0);
1067
1068 pbuff[j] = 0;
1069 }
1070
1071 } else if (kh == 2) {
1072 sp += sll;
1073
1074 #ifdef __SUNPRO_C
1075 #pragma pipeloop(0)
1076 #endif /* __SUNPRO_C */
1077 for (j = 0; j <= (hsize - 2); j += 2) {
1078 p0 = p2;
1079 p1 = sp[0];
1080 p2 = sp[sll];
1081
1082 d0 = p0*k0 + p1*k1 + pbuff[j];
1083 d1 = p1*k0 + p2*k1 + pbuff[j + 1];
1084 CLAMP_S32(dp[0 ], d0);
1085 CLAMP_S32(dp[dll], d1);
1086
1087 pbuff[j] = 0;
1088 pbuff[j + 1] = 0;
1089
1090 sp += 2*sll;
1091 dp += 2*dll;
1092 }
1093
1094 if (j < hsize) {
1095 p0 = p2;
1096 p1 = sp[0];
1097
1098 d0 = p0*k0 + p1*k1 + pbuff[j];
1099 CLAMP_S32(dp[0], d0);
1100
1101 pbuff[j] = 0;
1102 }
1103
1104 } else /* if (kh == 1) */ {
1105 #ifdef __SUNPRO_C
1106 #pragma pipeloop(0)
1107 #endif /* __SUNPRO_C */
1108 for (j = 0; j < hsize; j++) {
1109 p0 = sp[0];
1110
1111 d0 = p0*k0 + pbuff[j];
1112 CLAMP_S32(dp[0], d0);
1113
1114 pbuff[j] = 0;
1115
1116 sp += sll;
1117 dp += dll;
1118 }
1119 }
1120
1121 sl += chan1;
1122 dl += chan1;
1123 }
1124 }
1125
1126 sl_c += max_hsize*sll;
1127 dl_c += max_hsize*dll;
1128 }
1129
1130 if (pbuff != buff) mlib_free(pbuff);
1131
1132 return MLIB_SUCCESS;
1133 }
1134
1135 /***************************************************************/
1136 #define MAX_KER 7
1137
1138 #define MAX_N 15
1139
1140 #undef BUFF_SIZE
1141 #define BUFF_SIZE 1500
1142
CONV_FUNC(MxN)1143 mlib_status CONV_FUNC(MxN)(mlib_image *dst,
1144 const mlib_image *src,
1145 const mlib_s32 *kernel,
1146 mlib_s32 m,
1147 mlib_s32 n,
1148 mlib_s32 dm,
1149 mlib_s32 dn,
1150 mlib_s32 scale,
1151 mlib_s32 cmask)
1152 {
1153 mlib_d64 buff[BUFF_SIZE], *buffs_arr[2*(MAX_N + 1)];
1154 mlib_d64 **buffs = buffs_arr, *buffd;
1155 mlib_d64 akernel[256], *k = akernel, fscale = 1.0;
1156 mlib_s32 l, off, kw, bsize, buff_ind, mn;
1157 mlib_d64 d0, d1;
1158 mlib_d64 k0, k1, k2, k3, k4, k5, k6;
1159 mlib_d64 p0, p1, p2, p3, p4, p5, p6, p7;
1160 DEF_VARS_MxN(mlib_s32);
1161 mlib_s32 chan2 = chan1 + chan1;
1162
1163 mlib_status status = MLIB_SUCCESS;
1164
1165 if (scale > 30) {
1166 fscale *= 1.0/(1 << 30);
1167 scale -= 30;
1168 }
1169
1170 fscale /= (1 << scale);
1171
1172 mn = m*n;
1173
1174 if (mn > 256) {
1175 k = mlib_malloc(mn*sizeof(mlib_d64));
1176
1177 if (k == NULL) return MLIB_FAILURE;
1178 }
1179
1180 for (i = 0; i < mn; i++) {
1181 k[i] = kernel[i]*fscale;
1182 }
1183
1184 if (m == 1) {
1185 status = mlib_ImageConv1xN(dst, src, k, n, dn, cmask);
1186 FREE_AND_RETURN_STATUS;
1187 }
1188
1189 bsize = (n + 2)*wid;
1190
1191 if ((bsize > BUFF_SIZE) || (n > MAX_N)) {
1192 pbuff = mlib_malloc(sizeof(mlib_d64)*bsize + sizeof(mlib_d64*)*2*(n + 1));
1193
1194 if (pbuff == NULL) {
1195 status = MLIB_FAILURE;
1196 FREE_AND_RETURN_STATUS;
1197 }
1198 buffs = (mlib_d64**)(pbuff + bsize);
1199 }
1200
1201 for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*wid;
1202 for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l];
1203 buffd = buffs[n] + wid;
1204
1205 wid -= (m - 1);
1206 hgt -= (n - 1);
1207 adr_dst += dn*dll + dm*chan1;
1208
1209 for (c = 0; c < chan1; c++) {
1210 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
1211
1212 sl = adr_src + c;
1213 dl = adr_dst + c;
1214
1215 for (l = 0; l < n; l++) {
1216 mlib_d64 *buff = buffs[l];
1217
1218 #ifdef __SUNPRO_C
1219 #pragma pipeloop(0)
1220 #endif /* __SUNPRO_C */
1221 for (i = 0; i < wid + (m - 1); i++) {
1222 buff[i] = (mlib_d64)sl[i*chan1];
1223 }
1224
1225 sl += sll;
1226 }
1227
1228 buff_ind = 0;
1229
1230 #ifdef __SUNPRO_C
1231 #pragma pipeloop(0)
1232 #endif /* __SUNPRO_C */
1233 for (i = 0; i < wid; i++) buffd[i] = 0.0;
1234
1235 for (j = 0; j < hgt; j++) {
1236 mlib_d64 **buffc = buffs + buff_ind;
1237 mlib_d64 *buffn = buffc[n];
1238 mlib_d64 *pk = k;
1239
1240 for (l = 0; l < n; l++) {
1241 mlib_d64 *buff_l = buffc[l];
1242
1243 for (off = 0; off < m;) {
1244 mlib_d64 *buff = buff_l + off;
1245
1246 kw = m - off;
1247
1248 if (kw > 2*MAX_KER) kw = MAX_KER; else
1249 if (kw > MAX_KER) kw = kw/2;
1250 off += kw;
1251
1252 sp = sl;
1253 dp = dl;
1254
1255 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
1256 p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
1257
1258 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
1259 k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
1260 pk += kw;
1261
1262 if (kw == 7) {
1263
1264 if (l < (n - 1) || off < m) {
1265 #ifdef __SUNPRO_C
1266 #pragma pipeloop(0)
1267 #endif /* __SUNPRO_C */
1268 for (i = 0; i <= (wid - 2); i += 2) {
1269 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
1270
1271 p6 = buff[i + 6]; p7 = buff[i + 7];
1272
1273 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
1274 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
1275 }
1276
1277 } else {
1278 #ifdef __SUNPRO_C
1279 #pragma pipeloop(0)
1280 #endif /* __SUNPRO_C */
1281 for (i = 0; i <= (wid - 2); i += 2) {
1282 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
1283
1284 p6 = buff[i + 6]; p7 = buff[i + 7];
1285
1286 buffn[i ] = (mlib_d64)sp[0];
1287 buffn[i + 1] = (mlib_d64)sp[chan1];
1288
1289 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ];
1290 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1];
1291
1292 CLAMP_S32(dp[0], d0);
1293 CLAMP_S32(dp[chan1], d1);
1294
1295 buffd[i ] = 0.0;
1296 buffd[i + 1] = 0.0;
1297
1298 sp += chan2;
1299 dp += chan2;
1300 }
1301 }
1302
1303 } else if (kw == 6) {
1304
1305 if (l < (n - 1) || off < m) {
1306 #ifdef __SUNPRO_C
1307 #pragma pipeloop(0)
1308 #endif /* __SUNPRO_C */
1309 for (i = 0; i <= (wid - 2); i += 2) {
1310 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1311
1312 p5 = buff[i + 5]; p6 = buff[i + 6];
1313
1314 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
1315 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
1316 }
1317
1318 } else {
1319 #ifdef __SUNPRO_C
1320 #pragma pipeloop(0)
1321 #endif /* __SUNPRO_C */
1322 for (i = 0; i <= (wid - 2); i += 2) {
1323 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1324
1325 p5 = buff[i + 5]; p6 = buff[i + 6];
1326
1327 buffn[i ] = (mlib_d64)sp[0];
1328 buffn[i + 1] = (mlib_d64)sp[chan1];
1329
1330 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ];
1331 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1];
1332
1333 CLAMP_S32(dp[0], d0);
1334 CLAMP_S32(dp[chan1], d1);
1335
1336 buffd[i ] = 0.0;
1337 buffd[i + 1] = 0.0;
1338
1339 sp += chan2;
1340 dp += chan2;
1341 }
1342 }
1343
1344 } else if (kw == 5) {
1345
1346 if (l < (n - 1) || off < m) {
1347 #ifdef __SUNPRO_C
1348 #pragma pipeloop(0)
1349 #endif /* __SUNPRO_C */
1350 for (i = 0; i <= (wid - 2); i += 2) {
1351 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1352
1353 p4 = buff[i + 4]; p5 = buff[i + 5];
1354
1355 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
1356 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
1357 }
1358
1359 } else {
1360 #ifdef __SUNPRO_C
1361 #pragma pipeloop(0)
1362 #endif /* __SUNPRO_C */
1363 for (i = 0; i <= (wid - 2); i += 2) {
1364 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1365
1366 p4 = buff[i + 4]; p5 = buff[i + 5];
1367
1368 buffn[i ] = (mlib_d64)sp[0];
1369 buffn[i + 1] = (mlib_d64)sp[chan1];
1370
1371 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ];
1372 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1];
1373
1374 CLAMP_S32(dp[0], d0);
1375 CLAMP_S32(dp[chan1], d1);
1376
1377 buffd[i ] = 0.0;
1378 buffd[i + 1] = 0.0;
1379
1380 sp += chan2;
1381 dp += chan2;
1382 }
1383 }
1384
1385 } else if (kw == 4) {
1386
1387 if (l < (n - 1) || off < m) {
1388 #ifdef __SUNPRO_C
1389 #pragma pipeloop(0)
1390 #endif /* __SUNPRO_C */
1391 for (i = 0; i <= (wid - 2); i += 2) {
1392 p0 = p2; p1 = p3; p2 = p4;
1393
1394 p3 = buff[i + 3]; p4 = buff[i + 4];
1395
1396 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
1397 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
1398 }
1399
1400 } else {
1401 #ifdef __SUNPRO_C
1402 #pragma pipeloop(0)
1403 #endif /* __SUNPRO_C */
1404 for (i = 0; i <= (wid - 2); i += 2) {
1405 p0 = p2; p1 = p3; p2 = p4;
1406
1407 p3 = buff[i + 3]; p4 = buff[i + 4];
1408
1409 buffn[i ] = (mlib_d64)sp[0];
1410 buffn[i + 1] = (mlib_d64)sp[chan1];
1411
1412 d0 = p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ];
1413 d1 = p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1];
1414
1415 CLAMP_S32(dp[0], d0);
1416 CLAMP_S32(dp[chan1], d1);
1417
1418 buffd[i ] = 0.0;
1419 buffd[i + 1] = 0.0;
1420
1421 sp += chan2;
1422 dp += chan2;
1423 }
1424 }
1425
1426 } else if (kw == 3) {
1427
1428 if (l < (n - 1) || off < m) {
1429 #ifdef __SUNPRO_C
1430 #pragma pipeloop(0)
1431 #endif /* __SUNPRO_C */
1432 for (i = 0; i <= (wid - 2); i += 2) {
1433 p0 = p2; p1 = p3;
1434
1435 p2 = buff[i + 2]; p3 = buff[i + 3];
1436
1437 buffd[i ] += p0*k0 + p1*k1 + p2*k2;
1438 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
1439 }
1440
1441 } else {
1442 #ifdef __SUNPRO_C
1443 #pragma pipeloop(0)
1444 #endif /* __SUNPRO_C */
1445 for (i = 0; i <= (wid - 2); i += 2) {
1446 p0 = p2; p1 = p3;
1447
1448 p2 = buff[i + 2]; p3 = buff[i + 3];
1449
1450 buffn[i ] = (mlib_d64)sp[0];
1451 buffn[i + 1] = (mlib_d64)sp[chan1];
1452
1453 d0 = p0*k0 + p1*k1 + p2*k2 + buffd[i ];
1454 d1 = p1*k0 + p2*k1 + p3*k2 + buffd[i + 1];
1455
1456 CLAMP_S32(dp[0], d0);
1457 CLAMP_S32(dp[chan1], d1);
1458
1459 buffd[i ] = 0.0;
1460 buffd[i + 1] = 0.0;
1461
1462 sp += chan2;
1463 dp += chan2;
1464 }
1465 }
1466
1467 } else { /* kw == 2 */
1468
1469 if (l < (n - 1) || off < m) {
1470 #ifdef __SUNPRO_C
1471 #pragma pipeloop(0)
1472 #endif /* __SUNPRO_C */
1473 for (i = 0; i <= (wid - 2); i += 2) {
1474 p0 = p2;
1475
1476 p1 = buff[i + 1]; p2 = buff[i + 2];
1477
1478 buffd[i ] += p0*k0 + p1*k1;
1479 buffd[i + 1] += p1*k0 + p2*k1;
1480 }
1481
1482 } else {
1483 #ifdef __SUNPRO_C
1484 #pragma pipeloop(0)
1485 #endif /* __SUNPRO_C */
1486 for (i = 0; i <= (wid - 2); i += 2) {
1487 p0 = p2;
1488
1489 p1 = buff[i + 1]; p2 = buff[i + 2];
1490
1491 buffn[i ] = (mlib_d64)sp[0];
1492 buffn[i + 1] = (mlib_d64)sp[chan1];
1493
1494 d0 = p0*k0 + p1*k1 + buffd[i ];
1495 d1 = p1*k0 + p2*k1 + buffd[i + 1];
1496
1497 CLAMP_S32(dp[0], d0);
1498 CLAMP_S32(dp[chan1], d1);
1499
1500 buffd[i ] = 0.0;
1501 buffd[i + 1] = 0.0;
1502
1503 sp += chan2;
1504 dp += chan2;
1505 }
1506 }
1507 }
1508 }
1509 }
1510
1511 /* last pixels */
1512 for (; i < wid; i++) {
1513 mlib_d64 *pk = k, s = 0;
1514 mlib_s32 x;
1515
1516 for (l = 0; l < n; l++) {
1517 mlib_d64 *buff = buffc[l] + i;
1518
1519 for (x = 0; x < m; x++) s += buff[x] * (*pk++);
1520 }
1521
1522 CLAMP_S32(dp[0], s);
1523
1524 buffn[i] = (mlib_d64)sp[0];
1525
1526 sp += chan1;
1527 dp += chan1;
1528 }
1529
1530 for (l = 0; l < (m - 1); l++) buffn[wid + l] = sp[l*chan1];
1531
1532 /* next line */
1533 sl += sll;
1534 dl += dll;
1535
1536 buff_ind++;
1537
1538 if (buff_ind >= n + 1) buff_ind = 0;
1539 }
1540 }
1541
1542 FREE_AND_RETURN_STATUS;
1543 }
1544
1545 /***************************************************************/
1546