1 /*
2  * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 
27 
28 /*
29  * FUNCTIONS
30  *      mlib_ImageConvCopyEdge  - Copy src edges  to dst edges
31  *
32  *
33  * SYNOPSIS
34  *      mlib_status mlib_ImageConvCopyEdge(mlib_image       *dst,
35  *                                         const mlib_image *src,
36  *                                         mlib_s32         dx_l,
37  *                                         mlib_s32         dx_r,
38  *                                         mlib_s32         dy_t,
39  *                                         mlib_s32         dy_b,
40  *                                         mlib_s32         cmask)
41  *
42  * ARGUMENT
43  *      dst       Pointer to an dst image.
44  *      src       Pointer to an src image.
45  *      dx_l      Number of columns on the left side of the
46  *                image to be copyed.
47  *      dx_r      Number of columns on the right side of the
48  *                image to be copyed.
49  *      dy_t      Number of rows on the top edge of the
50  *                image to be copyed.
51  *      dy_b      Number of rows on the top edge of the
52  *                image to be copyed.
53  *      cmask     Channel mask to indicate the channels to be convolved.
54  *                Each bit of which represents a channel in the image. The
55  *                channels corresponded to 1 bits are those to be processed.
56  *
57  * RESTRICTION
58  *      The src and the dst must be the same type, same width, same height and have same number
59  *      of channels (1, 2, 3, or 4). The unselected channels are not
60  *      overwritten. If both src and dst have just one channel,
61  *      cmask is ignored.
62  *
63  * DESCRIPTION
64  *      Copy src edges  to dst edges.
65 
66  *      The unselected channels are not overwritten.
67  *      If src and dst have just one channel,
68  *      cmask is ignored.
69  */
70 
71 #include "vis_proto.h"
72 #include "mlib_image.h"
73 #include "mlib_ImageConvEdge.h"
74 
75 /***************************************************************/
76 static void mlib_ImageConvCopyEdge_U8(mlib_image       *dst,
77                                       const mlib_image *src,
78                                       mlib_s32         dx_l,
79                                       mlib_s32         dx_r,
80                                       mlib_s32         dy_t,
81                                       mlib_s32         dy_b,
82                                       mlib_s32         cmask,
83                                       mlib_s32         nchan);
84 
85 static void mlib_ImageConvCopyEdge_U8_3(mlib_image       *dst,
86                                         const mlib_image *src,
87                                         mlib_s32         dx_l,
88                                         mlib_s32         dx_r,
89                                         mlib_s32         dy_t,
90                                         mlib_s32         dy_b,
91                                         mlib_s32         cmask);
92 
93 static void mlib_ImageConvCopyEdge_S16(mlib_image       *dst,
94                                        const mlib_image *src,
95                                        mlib_s32         dx_l,
96                                        mlib_s32         dx_r,
97                                        mlib_s32         dy_t,
98                                        mlib_s32         dy_b,
99                                        mlib_s32         cmask,
100                                        mlib_s32         nchan);
101 
102 static void mlib_ImageConvCopyEdge_S16_3(mlib_image       *dst,
103                                          const mlib_image *src,
104                                          mlib_s32         dx_l,
105                                          mlib_s32         dx_r,
106                                          mlib_s32         dy_t,
107                                          mlib_s32         dy_b,
108                                          mlib_s32         cmask);
109 
110 static void mlib_ImageConvCopyEdge_S32(mlib_image       *dst,
111                                        const mlib_image *src,
112                                        mlib_s32         dx_l,
113                                        mlib_s32         dx_r,
114                                        mlib_s32         dy_t,
115                                        mlib_s32         dy_b,
116                                        mlib_s32         cmask,
117                                        mlib_s32         nchan);
118 
119 static void mlib_ImageConvCopyEdge_S32_3(mlib_image       *dst,
120                                          const mlib_image *src,
121                                          mlib_s32         dx_l,
122                                          mlib_s32         dx_r,
123                                          mlib_s32         dy_t,
124                                          mlib_s32         dy_b,
125                                          mlib_s32         cmask);
126 
127 static void mlib_ImageConvCopyEdge_S32_4(mlib_image       *dst,
128                                          const mlib_image *src,
129                                          mlib_s32         dx_l,
130                                          mlib_s32         dx_r,
131                                          mlib_s32         dy_t,
132                                          mlib_s32         dy_b,
133                                          mlib_s32         cmask);
134 
135 /***************************************************************/
136 #define VERT_EDGES(chan, type, mask)                             \
137   type *pdst = (type *) mlib_ImageGetData(dst);                  \
138   type *psrc = (type *) mlib_ImageGetData(src);                  \
139   type *pdst_row, *psrc_row, *pdst_row_end;                      \
140   mlib_s32 img_height = mlib_ImageGetHeight(dst);                \
141   mlib_s32 img_width  = mlib_ImageGetWidth(dst);                 \
142   mlib_s32 dst_stride = mlib_ImageGetStride(dst) / sizeof(type); \
143   mlib_s32 src_stride = mlib_ImageGetStride(src) / sizeof(type); \
144   mlib_s32 i, j, l;                                              \
145   mlib_s32 emask, testchan;                                      \
146   mlib_s32 img_width_t, img_width_b;                             \
147   mlib_d64 *dpdst, *dpsrc, data0, data1;                         \
148                                                                  \
149   testchan = 1;                                                  \
150   for (l = chan - 1; l >= 0; l--) {                              \
151     if ((mask & testchan) == 0) {                                \
152       testchan <<= 1;                                            \
153       continue;                                                  \
154     }                                                            \
155     testchan <<= 1;                                              \
156     for (j = 0; j < dx_l; j++) {                                 \
157       for (i = dy_t; i < (img_height - dy_b); i++) {             \
158         pdst[i*dst_stride + l + j*chan] =                        \
159           psrc[i*src_stride + l + j*chan];                       \
160       }                                                          \
161     }                                                            \
162     for (j = 0; j < dx_r; j++) {                                 \
163       for (i = dy_t; i < (img_height - dy_b); i++) {             \
164         pdst[i*dst_stride + l+(img_width-1 - j)*chan] =          \
165         psrc[i*src_stride + l+(img_width-1 - j)*chan];           \
166       }                                                          \
167     }                                                            \
168   }                                                              \
169   img_width_t = img_width;                                       \
170   img_width_b = img_width;                                       \
171   if (((img_width * chan) == dst_stride) &&                      \
172       ((img_width * chan) == src_stride)) {                      \
173     img_width_t *= dy_t;                                         \
174     img_width_b *= dy_b;                                         \
175     dst_stride *= (img_height - dy_b);                           \
176     src_stride *= (img_height - dy_b);                           \
177     img_height = 2;                                              \
178     dy_t = ((dy_t == 0) ? 0 : 1);                                \
179     dy_b = ((dy_b == 0) ? 0 : 1);                                \
180   }
181 
182 /***************************************************************/
183 #define HORIZ_EDGES(chan, type, mask) {                         \
184     testchan = 1;                                               \
185     for (l = chan - 1; l >= 0; l--) {                           \
186       if ((mask & testchan) == 0) {                             \
187         testchan <<= 1;                                         \
188         continue;                                               \
189       }                                                         \
190       testchan <<= 1;                                           \
191       for (i = 0; i < dy_t; i++) {                              \
192         for (j = 0; j < img_width_t; j++) {                     \
193           pdst[i*dst_stride + l + j*chan] =                     \
194             psrc[i*src_stride + l + j*chan];                    \
195         }                                                       \
196       }                                                         \
197       for (i = 0; i < dy_b; i++) {                              \
198         for (j = 0; j < img_width_b; j++) {                     \
199           pdst[(img_height-1 - i)*dst_stride + l + j*chan] =    \
200           psrc[(img_height-1 - i)*src_stride + l + j*chan];     \
201         }                                                       \
202       }                                                         \
203     }                                                           \
204     return;                                                     \
205   }
206 
207 /***************************************************************/
mlib_ImageConvCopyEdge(mlib_image * dst,const mlib_image * src,mlib_s32 dx_l,mlib_s32 dx_r,mlib_s32 dy_t,mlib_s32 dy_b,mlib_s32 cmask)208 mlib_status mlib_ImageConvCopyEdge(mlib_image       *dst,
209                                    const mlib_image *src,
210                                    mlib_s32         dx_l,
211                                    mlib_s32         dx_r,
212                                    mlib_s32         dy_t,
213                                    mlib_s32         dy_b,
214                                    mlib_s32         cmask)
215 {
216   mlib_s32 img_width = mlib_ImageGetWidth(dst);
217   mlib_s32 img_height = mlib_ImageGetHeight(dst);
218 
219   if (dx_l + dx_r > img_width) {
220     dx_l = img_width;
221     dx_r = 0;
222   }
223 
224   if (dy_t + dy_b > img_height) {
225     dy_t = img_height;
226     dy_b = 0;
227   }
228 
229   switch (mlib_ImageGetType(dst)) {
230     case MLIB_BIT:
231       return mlib_ImageConvCopyEdge_Bit(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
232 
233     case MLIB_BYTE:
234       switch (mlib_ImageGetChannels(dst)) {
235 
236         case 1:
237           mlib_ImageConvCopyEdge_U8(dst, src, dx_l, dx_r, dy_t, dy_b, 1, 1);
238           break;
239 
240         case 2:
241           mlib_ImageConvCopyEdge_U8(dst, src, dx_l, dx_r, dy_t, dy_b, cmask, 2);
242           break;
243 
244         case 3:
245           mlib_ImageConvCopyEdge_U8_3(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
246           break;
247 
248         case 4:
249           mlib_ImageConvCopyEdge_U8(dst, src, dx_l, dx_r, dy_t, dy_b, cmask, 4);
250           break;
251 
252         default:
253           return MLIB_FAILURE;
254       }
255 
256       break;
257 
258     case MLIB_SHORT:
259     case MLIB_USHORT:
260       switch (mlib_ImageGetChannels(dst)) {
261 
262         case 1:
263           mlib_ImageConvCopyEdge_S16(dst, src, dx_l, dx_r, dy_t, dy_b, 1, 1);
264           break;
265 
266         case 2:
267           mlib_ImageConvCopyEdge_S16(dst, src, dx_l, dx_r, dy_t, dy_b, cmask, 2);
268           break;
269 
270         case 3:
271           mlib_ImageConvCopyEdge_S16_3(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
272           break;
273 
274         case 4:
275           mlib_ImageConvCopyEdge_S16(dst, src, dx_l, dx_r, dy_t, dy_b, cmask, 4);
276           break;
277 
278         default:
279           return MLIB_FAILURE;
280       }
281 
282       break;
283 
284     case MLIB_INT:
285     case MLIB_FLOAT:
286       switch (mlib_ImageGetChannels(dst)) {
287 
288         case 1:
289           mlib_ImageConvCopyEdge_S32(dst, src, dx_l, dx_r, dy_t, dy_b, 1, 1);
290           break;
291 
292         case 2:
293           mlib_ImageConvCopyEdge_S32(dst, src, dx_l, dx_r, dy_t, dy_b, cmask, 2);
294           break;
295 
296         case 3:
297           mlib_ImageConvCopyEdge_S32_3(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
298           break;
299 
300         case 4:
301           mlib_ImageConvCopyEdge_S32_4(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
302           break;
303 
304         default:
305           return MLIB_FAILURE;
306       }
307 
308       break;
309 
310     case MLIB_DOUBLE:
311       return mlib_ImageConvCopyEdge_Fp(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
312 
313     default:
314       return MLIB_FAILURE;
315   }
316 
317   return MLIB_SUCCESS;
318 }
319 
320 /***************************************************************/
mlib_ImageConvCopyEdge_U8(mlib_image * dst,const mlib_image * src,mlib_s32 dx_l,mlib_s32 dx_r,mlib_s32 dy_t,mlib_s32 dy_b,mlib_s32 cmask,mlib_s32 nchan)321 void mlib_ImageConvCopyEdge_U8(mlib_image       *dst,
322                                const mlib_image *src,
323                                mlib_s32         dx_l,
324                                mlib_s32         dx_r,
325                                mlib_s32         dy_t,
326                                mlib_s32         dy_b,
327                                mlib_s32         cmask,
328                                mlib_s32         nchan)
329 {
330   mlib_s32 tmask = cmask & ((1 << nchan) - 1), mask1, offset;
331   VERT_EDGES(nchan, mlib_u8, cmask);
332 
333   if (img_width < 16 / nchan)
334     HORIZ_EDGES(nchan, mlib_u8, cmask);
335 
336   if (nchan == 1)
337     tmask = 0xFFFF;
338   else if (nchan == 2) {
339     tmask |= (tmask << 2);
340     tmask |= (tmask << 4);
341     tmask |= (tmask << 8);
342   }
343   else if (nchan == 4) {
344     tmask |= (tmask << 4);
345     tmask |= (tmask << 8);
346   }
347 
348   for (i = 0; i < dy_t; i++) {
349     pdst_row = pdst + i * dst_stride,
350       psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * nchan - 1;
351     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
352     offset = pdst_row - (mlib_u8 *) dpdst;
353     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -offset);
354     mask1 = (tmask >> offset);
355     data0 = *dpsrc++;
356     data1 = *dpsrc++;
357     emask = vis_edge8(pdst_row, pdst_row_end) & mask1;
358     vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
359     j = (mlib_s32) ((mlib_u8 *) dpdst - pdst_row);
360     data0 = data1;
361     for (; j < (img_width_t * nchan - 8); j += 8) {
362       data1 = *dpsrc++;
363       vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask1);
364       data0 = data1;
365     }
366 
367     data1 = *dpsrc++;
368     emask = vis_edge8(dpdst, pdst_row_end) & mask1;
369     vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
370   }
371 
372   for (i = 0; i < dy_b; i++) {
373     pdst_row = pdst + (img_height - 1 - i) * dst_stride;
374     psrc_row = psrc + (img_height - 1 - i) * src_stride;
375     pdst_row_end = pdst_row + img_width_b * nchan - 1;
376     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
377     offset = pdst_row - (mlib_u8 *) dpdst;
378     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -offset);
379     mask1 = (tmask >> offset);
380     data0 = *dpsrc++;
381     data1 = *dpsrc++;
382     emask = vis_edge8(pdst_row, pdst_row_end) & mask1;
383     vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
384     j = (mlib_s32) ((mlib_u8 *) dpdst - pdst_row);
385     data0 = data1;
386     for (; j < (img_width_b * nchan - 8); j += 8) {
387       data1 = *dpsrc++;
388       vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask1);
389       data0 = data1;
390     }
391 
392     data1 = *dpsrc++;
393     emask = vis_edge8(dpdst, pdst_row_end) & mask1;
394     vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
395   }
396 }
397 
398 /***************************************************************/
mlib_ImageConvCopyEdge_U8_3(mlib_image * dst,const mlib_image * src,mlib_s32 dx_l,mlib_s32 dx_r,mlib_s32 dy_t,mlib_s32 dy_b,mlib_s32 cmask)399 void mlib_ImageConvCopyEdge_U8_3(mlib_image       *dst,
400                                  const mlib_image *src,
401                                  mlib_s32         dx_l,
402                                  mlib_s32         dx_r,
403                                  mlib_s32         dy_t,
404                                  mlib_s32         dy_b,
405                                  mlib_s32         cmask)
406 {
407   mlib_s32 tmask = cmask & 7, mask0, mask1, mask2, offset;
408 
409   VERT_EDGES(3, mlib_u8, cmask);
410 
411   if (img_width < 16)
412     HORIZ_EDGES(3, mlib_u8, cmask);
413 
414   tmask |= (tmask << 3);
415   tmask |= (tmask << 6);
416   tmask |= (tmask << 12);
417   for (i = 0; i < dy_t; i++) {
418     pdst_row = pdst + i * dst_stride,
419       psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * 3 - 1;
420     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
421     offset = pdst_row - (mlib_u8 *) dpdst;
422     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -offset);
423     mask2 = (tmask >> (offset + 1));
424     mask0 = mask2 >> 1;
425     mask1 = mask0 >> 1;
426     data0 = *dpsrc++;
427     data1 = *dpsrc++;
428     emask = vis_edge8(pdst_row, pdst_row_end) & mask2;
429     vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
430     data0 = data1;
431     j = (mlib_s32) ((mlib_u8 *) dpdst - pdst_row);
432     for (; j < (img_width_t * 3 - 24); j += 24) {
433       data1 = *dpsrc++;
434       vis_pst_8(vis_faligndata(data0, data1), dpdst, mask0);
435       data0 = data1;
436       data1 = *dpsrc++;
437       vis_pst_8(vis_faligndata(data0, data1), dpdst + 1, mask1);
438       data0 = data1;
439       data1 = *dpsrc++;
440       vis_pst_8(vis_faligndata(data0, data1), dpdst + 2, mask2);
441       data0 = data1;
442       dpdst += 3;
443     }
444 
445     if (j < (img_width_t * 3 - 8)) {
446       data1 = *dpsrc++;
447       vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask0);
448       data0 = data1;
449 
450       if (j < (img_width_t * 3 - 16)) {
451         data1 = *dpsrc++;
452         vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask1);
453         data0 = data1;
454         mask0 = mask2;
455       }
456       else {
457         mask0 = mask1;
458       }
459     }
460 
461     data1 = *dpsrc++;
462     emask = vis_edge8(dpdst, pdst_row_end) & mask0;
463     vis_pst_8(vis_faligndata(data0, data1), dpdst, emask);
464   }
465 
466   for (i = 0; i < dy_b; i++) {
467     pdst_row = pdst + (img_height - 1 - i) * dst_stride;
468     psrc_row = psrc + (img_height - 1 - i) * src_stride;
469     pdst_row_end = pdst_row + img_width_b * 3 - 1;
470 
471     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
472     offset = pdst_row - (mlib_u8 *) dpdst;
473     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -offset);
474     mask2 = (tmask >> (offset + 1));
475     mask0 = mask2 >> 1;
476     mask1 = mask0 >> 1;
477     data0 = *dpsrc++;
478     data1 = *dpsrc++;
479     emask = vis_edge8(pdst_row, pdst_row_end) & mask2;
480     vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
481     data0 = data1;
482     j = (mlib_s32) ((mlib_u8 *) dpdst - pdst_row);
483     for (; j < (img_width_b * 3 - 24); j += 24) {
484       data1 = *dpsrc++;
485       vis_pst_8(vis_faligndata(data0, data1), dpdst, mask0);
486       data0 = data1;
487       data1 = *dpsrc++;
488       vis_pst_8(vis_faligndata(data0, data1), dpdst + 1, mask1);
489       data0 = data1;
490       data1 = *dpsrc++;
491       vis_pst_8(vis_faligndata(data0, data1), dpdst + 2, mask2);
492       data0 = data1;
493       dpdst += 3;
494     }
495 
496     if (j < (img_width_b * 3 - 8)) {
497       data1 = *dpsrc++;
498       vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask0);
499       data0 = data1;
500 
501       if (j < (img_width_b * 3 - 16)) {
502         data1 = *dpsrc++;
503         vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask1);
504         data0 = data1;
505         mask0 = mask2;
506       }
507       else {
508         mask0 = mask1;
509       }
510     }
511 
512     data1 = *dpsrc++;
513     emask = vis_edge8(dpdst, pdst_row_end) & mask0;
514     vis_pst_8(vis_faligndata(data0, data1), dpdst, emask);
515   }
516 }
517 
518 /***************************************************************/
mlib_ImageConvCopyEdge_S16(mlib_image * dst,const mlib_image * src,mlib_s32 dx_l,mlib_s32 dx_r,mlib_s32 dy_t,mlib_s32 dy_b,mlib_s32 cmask,mlib_s32 nchan)519 void mlib_ImageConvCopyEdge_S16(mlib_image       *dst,
520                                 const mlib_image *src,
521                                 mlib_s32         dx_l,
522                                 mlib_s32         dx_r,
523                                 mlib_s32         dy_t,
524                                 mlib_s32         dy_b,
525                                 mlib_s32         cmask,
526                                 mlib_s32         nchan)
527 {
528   mlib_s32 tmask = cmask & ((1 << nchan) - 1), mask1, offset;
529   VERT_EDGES(nchan, mlib_s16, cmask);
530 
531   if (img_width < 16 / nchan)
532     HORIZ_EDGES(nchan, mlib_s16, cmask);
533 
534   if (nchan == 1)
535     tmask = 0xFFFF;
536   else if (nchan == 2) {
537     tmask |= (tmask << 2);
538     tmask |= (tmask << 4);
539   }
540   else if (nchan == 4)
541     tmask |= (tmask << 4);
542 
543   for (i = 0; i < dy_t; i++) {
544     pdst_row = pdst + i * dst_stride,
545       psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * nchan - 1;
546     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
547     offset = pdst_row - (mlib_s16 *) dpdst;
548     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 1));
549     mask1 = (tmask >> offset);
550     data0 = *dpsrc++;
551     data1 = *dpsrc++;
552     emask = vis_edge16(pdst_row, pdst_row_end) & mask1;
553     vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
554     j = (mlib_s32) ((mlib_s16 *) dpdst - pdst_row);
555     data0 = data1;
556     for (; j < (img_width_t * nchan - 4); j += 4) {
557       data1 = *dpsrc++;
558       vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask1);
559       data0 = data1;
560     }
561 
562     data1 = *dpsrc++;
563     emask = vis_edge16(dpdst, pdst_row_end) & mask1;
564     vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
565   }
566 
567   for (i = 0; i < dy_b; i++) {
568     pdst_row = pdst + (img_height - 1 - i) * dst_stride;
569     psrc_row = psrc + (img_height - 1 - i) * src_stride;
570     pdst_row_end = pdst_row + img_width_b * nchan - 1;
571     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
572     offset = pdst_row - (mlib_s16 *) dpdst;
573     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 1));
574     mask1 = (tmask >> offset);
575     data0 = *dpsrc++;
576     data1 = *dpsrc++;
577     emask = vis_edge16(pdst_row, pdst_row_end) & mask1;
578     vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
579     j = (mlib_s32) ((mlib_s16 *) dpdst - pdst_row);
580     data0 = data1;
581     for (; j < (img_width_b * nchan - 4); j += 4) {
582       data1 = *dpsrc++;
583       vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask1);
584       data0 = data1;
585     }
586 
587     data1 = *dpsrc++;
588     emask = vis_edge16(dpdst, pdst_row_end) & mask1;
589     vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
590   }
591 }
592 
593 /***************************************************************/
mlib_ImageConvCopyEdge_S16_3(mlib_image * dst,const mlib_image * src,mlib_s32 dx_l,mlib_s32 dx_r,mlib_s32 dy_t,mlib_s32 dy_b,mlib_s32 cmask)594 void mlib_ImageConvCopyEdge_S16_3(mlib_image       *dst,
595                                   const mlib_image *src,
596                                   mlib_s32         dx_l,
597                                   mlib_s32         dx_r,
598                                   mlib_s32         dy_t,
599                                   mlib_s32         dy_b,
600                                   mlib_s32         cmask)
601 {
602   mlib_s32 tmask = cmask & 7, mask0, mask1, mask2, offset;
603 
604   VERT_EDGES(3, mlib_s16, cmask);
605 
606   if (img_width < 16)
607     HORIZ_EDGES(3, mlib_s16, cmask);
608 
609   tmask |= (tmask << 3);
610   tmask |= (tmask << 6);
611   tmask |= (tmask << 12);
612   for (i = 0; i < dy_t; i++) {
613     pdst_row = pdst + i * dst_stride,
614       psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * 3 - 1;
615     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
616     offset = pdst_row - (mlib_s16 *) dpdst;
617     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 1));
618     mask2 = (tmask >> (offset + 2));
619     mask0 = mask2 >> 2;
620     mask1 = mask0 >> 2;
621     data0 = *dpsrc++;
622     data1 = *dpsrc++;
623     emask = vis_edge16(pdst_row, pdst_row_end) & mask2;
624     vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
625     data0 = data1;
626     j = (mlib_s32) ((mlib_s16 *) dpdst - pdst_row);
627     for (; j < (img_width_t * 3 - 12); j += 12) {
628       data1 = *dpsrc++;
629       vis_pst_16(vis_faligndata(data0, data1), dpdst, mask0);
630       data0 = data1;
631       data1 = *dpsrc++;
632       vis_pst_16(vis_faligndata(data0, data1), dpdst + 1, mask1);
633       data0 = data1;
634       data1 = *dpsrc++;
635       vis_pst_16(vis_faligndata(data0, data1), dpdst + 2, mask2);
636       data0 = data1;
637       dpdst += 3;
638     }
639 
640     if (j < (img_width_t * 3 - 4)) {
641       data1 = *dpsrc++;
642       vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask0);
643       data0 = data1;
644 
645       if (j < (img_width_t * 3 - 8)) {
646         data1 = *dpsrc++;
647         vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask1);
648         data0 = data1;
649         mask0 = mask2;
650       }
651       else {
652         mask0 = mask1;
653       }
654     }
655 
656     data1 = *dpsrc++;
657     emask = vis_edge16(dpdst, pdst_row_end) & mask0;
658     vis_pst_16(vis_faligndata(data0, data1), dpdst, emask);
659   }
660 
661   for (i = 0; i < dy_b; i++) {
662     pdst_row = pdst + (img_height - 1 - i) * dst_stride;
663     psrc_row = psrc + (img_height - 1 - i) * src_stride;
664     pdst_row_end = pdst_row + img_width_b * 3 - 1;
665 
666     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
667     offset = pdst_row - (mlib_s16 *) dpdst;
668     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 1));
669     mask2 = (tmask >> (offset + 2));
670     mask0 = mask2 >> 2;
671     mask1 = mask0 >> 2;
672     data0 = *dpsrc++;
673     data1 = *dpsrc++;
674     emask = vis_edge16(pdst_row, pdst_row_end) & mask2;
675     vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
676     data0 = data1;
677     j = (mlib_s32) ((mlib_s16 *) dpdst - pdst_row);
678     for (; j < (img_width_b * 3 - 12); j += 12) {
679       data1 = *dpsrc++;
680       vis_pst_16(vis_faligndata(data0, data1), dpdst, mask0);
681       data0 = data1;
682       data1 = *dpsrc++;
683       vis_pst_16(vis_faligndata(data0, data1), dpdst + 1, mask1);
684       data0 = data1;
685       data1 = *dpsrc++;
686       vis_pst_16(vis_faligndata(data0, data1), dpdst + 2, mask2);
687       data0 = data1;
688       dpdst += 3;
689     }
690 
691     if (j < (img_width_b * 3 - 4)) {
692       data1 = *dpsrc++;
693       vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask0);
694       data0 = data1;
695 
696       if (j < (img_width_b * 3 - 8)) {
697         data1 = *dpsrc++;
698         vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask1);
699         data0 = data1;
700         mask0 = mask2;
701       }
702       else {
703         mask0 = mask1;
704       }
705     }
706 
707     data1 = *dpsrc++;
708     emask = vis_edge16(dpdst, pdst_row_end) & mask0;
709     vis_pst_16(vis_faligndata(data0, data1), dpdst, emask);
710   }
711 }
712 
713 /***************************************************************/
mlib_ImageConvCopyEdge_S32(mlib_image * dst,const mlib_image * src,mlib_s32 dx_l,mlib_s32 dx_r,mlib_s32 dy_t,mlib_s32 dy_b,mlib_s32 cmask,mlib_s32 nchan)714 void mlib_ImageConvCopyEdge_S32(mlib_image       *dst,
715                                 const mlib_image *src,
716                                 mlib_s32         dx_l,
717                                 mlib_s32         dx_r,
718                                 mlib_s32         dy_t,
719                                 mlib_s32         dy_b,
720                                 mlib_s32         cmask,
721                                 mlib_s32         nchan)
722 {
723   mlib_s32 tmask = cmask & ((1 << nchan) - 1), mask1, offset;
724   VERT_EDGES(nchan, mlib_s32, cmask);
725 
726   if (img_width < 16 / nchan)
727     HORIZ_EDGES(nchan, mlib_s32, cmask);
728 
729   if (nchan == 1)
730     tmask = 0xFFFF;
731   else if (nchan == 2) {
732     tmask |= (tmask << 2);
733     tmask |= (tmask << 4);
734   }
735 
736   for (i = 0; i < dy_t; i++) {
737     pdst_row = pdst + i * dst_stride,
738       psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * nchan - 1;
739     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
740     offset = pdst_row - (mlib_s32 *) dpdst;
741     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
742     mask1 = (tmask >> offset);
743     data0 = *dpsrc++;
744     data1 = *dpsrc++;
745     emask = vis_edge32(pdst_row, pdst_row_end) & mask1;
746     vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
747     j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
748     data0 = data1;
749     for (; j < (img_width_t * nchan - 2); j += 2) {
750       data1 = *dpsrc++;
751       vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask1);
752       data0 = data1;
753     }
754 
755     data1 = *dpsrc++;
756     emask = vis_edge32(dpdst, pdst_row_end) & mask1;
757     vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
758   }
759 
760   for (i = 0; i < dy_b; i++) {
761     pdst_row = pdst + (img_height - 1 - i) * dst_stride;
762     psrc_row = psrc + (img_height - 1 - i) * src_stride;
763     pdst_row_end = pdst_row + img_width_b * nchan - 1;
764     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
765     offset = pdst_row - (mlib_s32 *) dpdst;
766     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
767     mask1 = (tmask >> offset);
768     data0 = *dpsrc++;
769     data1 = *dpsrc++;
770     emask = vis_edge32(pdst_row, pdst_row_end) & mask1;
771     vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
772     j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
773     data0 = data1;
774     for (; j < (img_width_b * nchan - 2); j += 2) {
775       data1 = *dpsrc++;
776       vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask1);
777       data0 = data1;
778     }
779 
780     data1 = *dpsrc++;
781     emask = vis_edge32(dpdst, pdst_row_end) & mask1;
782     vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
783   }
784 }
785 
786 /***************************************************************/
mlib_ImageConvCopyEdge_S32_3(mlib_image * dst,const mlib_image * src,mlib_s32 dx_l,mlib_s32 dx_r,mlib_s32 dy_t,mlib_s32 dy_b,mlib_s32 cmask)787 void mlib_ImageConvCopyEdge_S32_3(mlib_image       *dst,
788                                   const mlib_image *src,
789                                   mlib_s32         dx_l,
790                                   mlib_s32         dx_r,
791                                   mlib_s32         dy_t,
792                                   mlib_s32         dy_b,
793                                   mlib_s32         cmask)
794 {
795   mlib_s32 tmask = cmask & 7, mask0, mask1, mask2, offset;
796 
797   VERT_EDGES(3, mlib_s32, cmask);
798 
799   if (img_width < 16)
800     HORIZ_EDGES(3, mlib_s32, cmask);
801 
802   tmask |= (tmask << 3);
803   tmask |= (tmask << 6);
804   tmask |= (tmask << 12);
805   for (i = 0; i < dy_t; i++) {
806     pdst_row = pdst + i * dst_stride,
807       psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * 3 - 1;
808     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
809     offset = pdst_row - (mlib_s32 *) dpdst;
810     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
811     mask2 = (tmask >> (offset + 1));
812     mask0 = mask2 >> 1;
813     mask1 = mask0 >> 1;
814     data0 = *dpsrc++;
815     data1 = *dpsrc++;
816     emask = vis_edge32(pdst_row, pdst_row_end) & mask2;
817     vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
818     data0 = data1;
819     j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
820     for (; j < (img_width_t * 3 - 6); j += 6) {
821       data1 = *dpsrc++;
822       vis_pst_32(vis_faligndata(data0, data1), dpdst, mask0);
823       data0 = data1;
824       data1 = *dpsrc++;
825       vis_pst_32(vis_faligndata(data0, data1), dpdst + 1, mask1);
826       data0 = data1;
827       data1 = *dpsrc++;
828       vis_pst_32(vis_faligndata(data0, data1), dpdst + 2, mask2);
829       data0 = data1;
830       dpdst += 3;
831     }
832 
833     if (j < (img_width_t * 3 - 2)) {
834       data1 = *dpsrc++;
835       vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask0);
836       data0 = data1;
837 
838       if (j < (img_width_t * 3 - 4)) {
839         data1 = *dpsrc++;
840         vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask1);
841         data0 = data1;
842         mask0 = mask2;
843       }
844       else {
845         mask0 = mask1;
846       }
847     }
848 
849     data1 = *dpsrc++;
850     emask = vis_edge32(dpdst, pdst_row_end) & mask0;
851     vis_pst_32(vis_faligndata(data0, data1), dpdst, emask);
852   }
853 
854   for (i = 0; i < dy_b; i++) {
855     pdst_row = pdst + (img_height - 1 - i) * dst_stride;
856     psrc_row = psrc + (img_height - 1 - i) * src_stride;
857     pdst_row_end = pdst_row + img_width_b * 3 - 1;
858 
859     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
860     offset = pdst_row - (mlib_s32 *) dpdst;
861     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
862     mask2 = (tmask >> (offset + 1));
863     mask0 = mask2 >> 1;
864     mask1 = mask0 >> 1;
865     data0 = *dpsrc++;
866     data1 = *dpsrc++;
867     emask = vis_edge32(pdst_row, pdst_row_end) & mask2;
868     vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
869     data0 = data1;
870     j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
871     for (; j < (img_width_b * 3 - 6); j += 6) {
872       data1 = *dpsrc++;
873       vis_pst_32(vis_faligndata(data0, data1), dpdst, mask0);
874       data0 = data1;
875       data1 = *dpsrc++;
876       vis_pst_32(vis_faligndata(data0, data1), dpdst + 1, mask1);
877       data0 = data1;
878       data1 = *dpsrc++;
879       vis_pst_32(vis_faligndata(data0, data1), dpdst + 2, mask2);
880       data0 = data1;
881       dpdst += 3;
882     }
883 
884     if (j < (img_width_b * 3 - 2)) {
885       data1 = *dpsrc++;
886       vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask0);
887       data0 = data1;
888 
889       if (j < (img_width_b * 3 - 4)) {
890         data1 = *dpsrc++;
891         vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask1);
892         data0 = data1;
893         mask0 = mask2;
894       }
895       else {
896         mask0 = mask1;
897       }
898     }
899 
900     data1 = *dpsrc++;
901     emask = vis_edge32(dpdst, pdst_row_end) & mask0;
902     vis_pst_32(vis_faligndata(data0, data1), dpdst, emask);
903   }
904 }
905 
906 /***************************************************************/
mlib_ImageConvCopyEdge_S32_4(mlib_image * dst,const mlib_image * src,mlib_s32 dx_l,mlib_s32 dx_r,mlib_s32 dy_t,mlib_s32 dy_b,mlib_s32 cmask)907 void mlib_ImageConvCopyEdge_S32_4(mlib_image       *dst,
908                                   const mlib_image *src,
909                                   mlib_s32         dx_l,
910                                   mlib_s32         dx_r,
911                                   mlib_s32         dy_t,
912                                   mlib_s32         dy_b,
913                                   mlib_s32         cmask)
914 {
915   mlib_s32 tmask = cmask & 15, mask0, mask1, offset;
916 
917   VERT_EDGES(4, mlib_s32, cmask);
918 
919   if (img_width < 16)
920     HORIZ_EDGES(4, mlib_s32, cmask);
921 
922   tmask |= (tmask << 4);
923   tmask |= (tmask << 8);
924   for (i = 0; i < dy_t; i++) {
925     pdst_row = pdst + i * dst_stride,
926       psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * 4 - 1;
927     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
928     offset = pdst_row - (mlib_s32 *) dpdst;
929     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
930     mask1 = (tmask >> (offset + 2));
931     mask0 = mask1 >> 2;
932     data0 = *dpsrc++;
933     data1 = *dpsrc++;
934     emask = vis_edge32(pdst_row, pdst_row_end) & mask1;
935     vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
936     data0 = data1;
937     j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
938     for (; j < (img_width_t * 4 - 4); j += 4) {
939       data1 = *dpsrc++;
940       vis_pst_32(vis_faligndata(data0, data1), dpdst, mask0);
941       data0 = *dpsrc++;
942       vis_pst_32(vis_faligndata(data1, data0), dpdst + 1, mask1);
943       dpdst += 2;
944     }
945 
946     if (j < (img_width_t * 4 - 2)) {
947       data1 = *dpsrc++;
948       vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask0);
949       data0 = data1;
950       mask0 = mask1;
951     }
952 
953     data1 = *dpsrc++;
954     emask = vis_edge32(dpdst, pdst_row_end) & mask0;
955     vis_pst_32(vis_faligndata(data0, data1), dpdst, emask);
956   }
957 
958   for (i = 0; i < dy_b; i++) {
959     pdst_row = pdst + (img_height - 1 - i) * dst_stride;
960     psrc_row = psrc + (img_height - 1 - i) * src_stride;
961     pdst_row_end = pdst_row + img_width_b * 4 - 1;
962 
963     dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
964     offset = pdst_row - (mlib_s32 *) dpdst;
965     dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
966     mask1 = (tmask >> (offset + 2));
967     mask0 = mask1 >> 2;
968     data0 = *dpsrc++;
969     data1 = *dpsrc++;
970     emask = vis_edge32(pdst_row, pdst_row_end) & mask1;
971     vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
972     data0 = data1;
973     j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
974     for (; j < (img_width_b * 4 - 4); j += 4) {
975       data1 = *dpsrc++;
976       vis_pst_32(vis_faligndata(data0, data1), dpdst, mask0);
977       data0 = *dpsrc++;
978       vis_pst_32(vis_faligndata(data1, data0), dpdst + 1, mask1);
979       dpdst += 2;
980     }
981 
982     if (j < (img_width_b * 4 - 2)) {
983       data1 = *dpsrc++;
984       vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask0);
985       data0 = data1;
986       mask0 = mask1;
987     }
988 
989     data1 = *dpsrc++;
990     emask = vis_edge32(dpdst, pdst_row_end) & mask0;
991     vis_pst_32(vis_faligndata(data0, data1), dpdst, emask);
992   }
993 }
994 
995 /***************************************************************/
996