1 /*
2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 
27 
28 #include "vis_proto.h"
29 #include "mlib_image.h"
30 #include "mlib_v_ImageLookUpFunc.h"
31 
32 /***************************************************************/
33 static void mlib_v_ImageLookUpSI_U16_U16_2_DstA8D1(const mlib_u16 *src,
34                                                    mlib_u16       *dst,
35                                                    mlib_s32       xsize,
36                                                    const mlib_u16 **table);
37 
38 static void mlib_v_ImageLookUpSI_U16_U16_2_D1(const mlib_u16 *src,
39                                               mlib_u16       *dst,
40                                               mlib_s32       xsize,
41                                               const mlib_u16 **table);
42 
43 static void mlib_v_ImageLookUpSI_U16_U16_3_D1(const mlib_u16 *src,
44                                               mlib_u16       *dst,
45                                               mlib_s32       xsize,
46                                               const mlib_u16 **table);
47 
48 static void mlib_v_ImageLookUpSI_U16_U16_4_DstOff0_D1(const mlib_u16 *src,
49                                                       mlib_u16       *dst,
50                                                       mlib_s32       xsize,
51                                                       const mlib_u16 **table);
52 
53 static void mlib_v_ImageLookUpSI_U16_U16_4_DstOff1_D1(const mlib_u16 *src,
54                                                       mlib_u16       *dst,
55                                                       mlib_s32       xsize,
56                                                       const mlib_u16 **table);
57 
58 static void mlib_v_ImageLookUpSI_U16_U16_4_DstOff2_D1(const mlib_u16 *src,
59                                                       mlib_u16       *dst,
60                                                       mlib_s32       xsize,
61                                                       const mlib_u16 **table);
62 
63 static void mlib_v_ImageLookUpSI_U16_U16_4_DstOff3_D1(const mlib_u16 *src,
64                                                       mlib_u16       *dst,
65                                                       mlib_s32       xsize,
66                                                       const mlib_u16 **table);
67 
68 /***************************************************************/
69 #define VIS_LD_U16_I(X, Y)      vis_ld_u16_i((void *)(X), (Y))
70 
71 /***************************************************************/
mlib_v_ImageLookUpSI_U16_U16_2_DstA8D1(const mlib_u16 * src,mlib_u16 * dst,mlib_s32 xsize,const mlib_u16 ** table)72 void mlib_v_ImageLookUpSI_U16_U16_2_DstA8D1(const mlib_u16 *src,
73                                             mlib_u16       *dst,
74                                             mlib_s32       xsize,
75                                             const mlib_u16 **table)
76 {
77   mlib_u16 *sp;                        /* pointer to source data */
78   mlib_s32 s0, s1;                     /* source data */
79   mlib_u16 *dl;                        /* pointer to start of destination */
80   mlib_d64 *dp;                        /* aligned pointer to destination */
81   mlib_d64 t0, t1, t2;                 /* destination data */
82   mlib_d64 t3, acc;                    /* destination data */
83   mlib_s32 i;                          /* loop variable */
84   const mlib_u16 *tab0 = &table[0][0];
85   const mlib_u16 *tab1 = &table[1][0];
86 
87   sp = (void *)src;
88   dl = dst;
89   dp = (mlib_d64 *) dl;
90 
91   vis_alignaddr((void *)0, 6);
92 
93   if (xsize >= 2) {
94 
95     s0 = (sp[0] << 1);
96     s1 = (sp[1] << 1);
97     sp += 2;
98 
99 #pragma pipeloop(0)
100     for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
101       t3 = VIS_LD_U16_I(tab1, s1);
102       t2 = VIS_LD_U16_I(tab0, s1);
103       t1 = VIS_LD_U16_I(tab1, s0);
104       t0 = VIS_LD_U16_I(tab0, s0);
105       acc = vis_faligndata(t3, acc);
106       acc = vis_faligndata(t2, acc);
107       acc = vis_faligndata(t1, acc);
108       acc = vis_faligndata(t0, acc);
109       s0 = (sp[0] << 1);
110       s1 = (sp[1] << 1);
111       *dp++ = acc;
112     }
113 
114     t3 = VIS_LD_U16_I(tab1, s1);
115     t2 = VIS_LD_U16_I(tab0, s1);
116     t1 = VIS_LD_U16_I(tab1, s0);
117     t0 = VIS_LD_U16_I(tab0, s0);
118     acc = vis_faligndata(t3, acc);
119     acc = vis_faligndata(t2, acc);
120     acc = vis_faligndata(t1, acc);
121     acc = vis_faligndata(t0, acc);
122     *dp++ = acc;
123   }
124 
125   if ((xsize & 1) != 0) {
126     s0 = (sp[0] << 1);
127     t1 = VIS_LD_U16_I(tab1, s0);
128     t0 = VIS_LD_U16_I(tab0, s0);
129     acc = vis_faligndata(t1, acc);
130     acc = vis_faligndata(t0, acc);
131     *(mlib_f32 *) dp = vis_read_hi(acc);
132   }
133 }
134 
135 /***************************************************************/
mlib_v_ImageLookUpSI_U16_U16_2_D1(const mlib_u16 * src,mlib_u16 * dst,mlib_s32 xsize,const mlib_u16 ** table)136 void mlib_v_ImageLookUpSI_U16_U16_2_D1(const mlib_u16 *src,
137                                        mlib_u16       *dst,
138                                        mlib_s32       xsize,
139                                        const mlib_u16 **table)
140 {
141   mlib_u16 *sp;                        /* pointer to source data */
142   mlib_s32 s0, s1, s2;                 /* source data */
143   mlib_u16 *dl;                        /* pointer to start of destination */
144   mlib_d64 *dp;                        /* aligned pointer to destination */
145   mlib_d64 t0, t1, t2;                 /* destination data */
146   mlib_d64 t3, acc;                    /* destination data */
147   mlib_s32 i;                          /* loop variable */
148   const mlib_u16 *tab0 = &table[0][0];
149   const mlib_u16 *tab1 = &table[1][0];
150 
151   sp = (void *)src;
152   dl = dst;
153 
154   vis_alignaddr((void *)0, 6);
155 
156   s0 = *sp++;
157   *dl++ = tab0[s0];
158   dp = (mlib_d64 *) dl;
159   xsize--;
160   s0 <<= 1;
161 
162   if (xsize >= 2) {
163 
164     s1 = (sp[0] << 1);
165     s2 = (sp[1] << 1);
166     sp += 2;
167 
168 #pragma pipeloop(0)
169     for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
170       t3 = VIS_LD_U16_I(tab0, s2);
171       t2 = VIS_LD_U16_I(tab1, s1);
172       t1 = VIS_LD_U16_I(tab0, s1);
173       t0 = VIS_LD_U16_I(tab1, s0);
174       acc = vis_faligndata(t3, acc);
175       acc = vis_faligndata(t2, acc);
176       acc = vis_faligndata(t1, acc);
177       acc = vis_faligndata(t0, acc);
178       s0 = s2;
179       s1 = (sp[0] << 1);
180       s2 = (sp[1] << 1);
181       *dp++ = acc;
182     }
183 
184     t3 = VIS_LD_U16_I(tab0, s2);
185     t2 = VIS_LD_U16_I(tab1, s1);
186     t1 = VIS_LD_U16_I(tab0, s1);
187     t0 = VIS_LD_U16_I(tab1, s0);
188     acc = vis_faligndata(t3, acc);
189     acc = vis_faligndata(t2, acc);
190     acc = vis_faligndata(t1, acc);
191     acc = vis_faligndata(t0, acc);
192     s0 = s2;
193     *dp++ = acc;
194   }
195 
196   dl = (mlib_u16 *) dp;
197 
198   if ((xsize & 1) != 0) {
199     s1 = (sp[0] << 1);
200     t1 = VIS_LD_U16_I(tab0, s1);
201     t0 = VIS_LD_U16_I(tab1, s0);
202     acc = vis_faligndata(t1, acc);
203     acc = vis_faligndata(t0, acc);
204     *(mlib_f32 *) dp = vis_read_hi(acc);
205     s0 = s1;
206     dl += 2;
207   }
208 
209   s0 >>= 1;
210   *dl = tab1[s0];
211 }
212 
213 /***************************************************************/
mlib_v_ImageLookUpSI_U16_U16_2(const mlib_u16 * src,mlib_s32 slb,mlib_u16 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_u16 ** table)214 void mlib_v_ImageLookUpSI_U16_U16_2(const mlib_u16 *src,
215                                     mlib_s32       slb,
216                                     mlib_u16       *dst,
217                                     mlib_s32       dlb,
218                                     mlib_s32       xsize,
219                                     mlib_s32       ysize,
220                                     const mlib_u16 **table)
221 {
222   mlib_u16 *sl;
223   mlib_u16 *dl;
224   mlib_s32 j;
225   const mlib_u16 *tab0 = &table[0][0];
226   const mlib_u16 *tab1 = &table[1][0];
227 
228   sl = (void *)src;
229   dl = dst;
230 
231   /* row loop */
232   for (j = 0; j < ysize; j++) {
233     mlib_u16 *sp = sl;
234     mlib_u16 *dp = dl;
235     mlib_s32 off, s0, size = xsize;
236 
237     off = ((8 - ((mlib_addr) dp & 7)) & 7);
238 
239     if ((off >= 4) && (size > 0)) {
240       s0 = *sp++;
241       *dp++ = tab0[s0];
242       *dp++ = tab1[s0];
243       size--;
244     }
245 
246     if (size > 0) {
247 
248       if (((mlib_addr) dp & 7) == 0) {
249         mlib_v_ImageLookUpSI_U16_U16_2_DstA8D1(sp, dp, size, table);
250       }
251       else {
252         mlib_v_ImageLookUpSI_U16_U16_2_D1(sp, dp, size, table);
253       }
254     }
255 
256     sl = (mlib_u16 *) ((mlib_u8 *) sl + slb);
257     dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
258   }
259 }
260 
261 /***************************************************************/
mlib_v_ImageLookUpSI_U16_U16_3_D1(const mlib_u16 * src,mlib_u16 * dst,mlib_s32 xsize,const mlib_u16 ** table)262 void mlib_v_ImageLookUpSI_U16_U16_3_D1(const mlib_u16 *src,
263                                        mlib_u16       *dst,
264                                        mlib_s32       xsize,
265                                        const mlib_u16 **table)
266 {
267   mlib_u16 *sp;                        /* pointer to source data */
268   mlib_u16 *dl;                        /* pointer to start of destination */
269   mlib_d64 *dp;                        /* aligned pointer to destination */
270   mlib_d64 t0, t1, t2, t3;             /* destination data */
271   mlib_d64 acc0, acc1, acc2;           /* destination data */
272   mlib_s32 i;                          /* loop variable */
273   const mlib_u16 *tab0 = &table[0][0];
274   const mlib_u16 *tab1 = &table[1][0];
275   const mlib_u16 *tab2 = &table[2][0];
276   mlib_s32 s00, s01, s02, s03;
277 
278   sp = (void *)src;
279   dl = dst;
280   dp = (mlib_d64 *) dl;
281 
282   vis_alignaddr((void *)0, 6);
283 
284   i = 0;
285 
286   if (xsize >= 4) {
287 
288     s00 = (sp[0] << 1);
289     s01 = (sp[1] << 1);
290     s02 = (sp[2] << 1);
291     s03 = (sp[3] << 1);
292     sp += 4;
293 
294 #pragma pipeloop(0)
295     for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
296       t3 = VIS_LD_U16_I(tab0, s01);
297       t2 = VIS_LD_U16_I(tab2, s00);
298       t1 = VIS_LD_U16_I(tab1, s00);
299       t0 = VIS_LD_U16_I(tab0, s00);
300       acc0 = vis_faligndata(t3, acc0);
301       acc0 = vis_faligndata(t2, acc0);
302       acc0 = vis_faligndata(t1, acc0);
303       acc0 = vis_faligndata(t0, acc0);
304       t3 = VIS_LD_U16_I(tab1, s02);
305       t2 = VIS_LD_U16_I(tab0, s02);
306       t1 = VIS_LD_U16_I(tab2, s01);
307       t0 = VIS_LD_U16_I(tab1, s01);
308       acc1 = vis_faligndata(t3, acc1);
309       acc1 = vis_faligndata(t2, acc1);
310       acc1 = vis_faligndata(t1, acc1);
311       acc1 = vis_faligndata(t0, acc1);
312       t3 = VIS_LD_U16_I(tab2, s03);
313       t2 = VIS_LD_U16_I(tab1, s03);
314       t1 = VIS_LD_U16_I(tab0, s03);
315       t0 = VIS_LD_U16_I(tab2, s02);
316       acc2 = vis_faligndata(t3, acc2);
317       acc2 = vis_faligndata(t2, acc2);
318       acc2 = vis_faligndata(t1, acc2);
319       acc2 = vis_faligndata(t0, acc2);
320       s00 = (sp[0] << 1);
321       s01 = (sp[1] << 1);
322       s02 = (sp[2] << 1);
323       s03 = (sp[3] << 1);
324       *dp++ = acc0;
325       *dp++ = acc1;
326       *dp++ = acc2;
327     }
328 
329     t3 = VIS_LD_U16_I(tab0, s01);
330     t2 = VIS_LD_U16_I(tab2, s00);
331     t1 = VIS_LD_U16_I(tab1, s00);
332     t0 = VIS_LD_U16_I(tab0, s00);
333     acc0 = vis_faligndata(t3, acc0);
334     acc0 = vis_faligndata(t2, acc0);
335     acc0 = vis_faligndata(t1, acc0);
336     acc0 = vis_faligndata(t0, acc0);
337     t3 = VIS_LD_U16_I(tab1, s02);
338     t2 = VIS_LD_U16_I(tab0, s02);
339     t1 = VIS_LD_U16_I(tab2, s01);
340     t0 = VIS_LD_U16_I(tab1, s01);
341     acc1 = vis_faligndata(t3, acc1);
342     acc1 = vis_faligndata(t2, acc1);
343     acc1 = vis_faligndata(t1, acc1);
344     acc1 = vis_faligndata(t0, acc1);
345     t3 = VIS_LD_U16_I(tab2, s03);
346     t2 = VIS_LD_U16_I(tab1, s03);
347     t1 = VIS_LD_U16_I(tab0, s03);
348     t0 = VIS_LD_U16_I(tab2, s02);
349     acc2 = vis_faligndata(t3, acc2);
350     acc2 = vis_faligndata(t2, acc2);
351     acc2 = vis_faligndata(t1, acc2);
352     acc2 = vis_faligndata(t0, acc2);
353     *dp++ = acc0;
354     *dp++ = acc1;
355     *dp++ = acc2;
356     i += 4;
357   }
358 
359   dl = (mlib_u16 *) dp;
360 
361 #pragma pipeloop(0)
362   for (; i < xsize; i++) {
363     s00 = sp[0];
364     dl[0] = tab0[s00];
365     dl[1] = tab1[s00];
366     dl[2] = tab2[s00];
367     dl += 3;
368     sp++;
369   }
370 }
371 
372 /***************************************************************/
mlib_v_ImageLookUpSI_U16_U16_3(const mlib_u16 * src,mlib_s32 slb,mlib_u16 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_u16 ** table)373 void mlib_v_ImageLookUpSI_U16_U16_3(const mlib_u16 *src,
374                                     mlib_s32       slb,
375                                     mlib_u16       *dst,
376                                     mlib_s32       dlb,
377                                     mlib_s32       xsize,
378                                     mlib_s32       ysize,
379                                     const mlib_u16 **table)
380 {
381   mlib_u16 *sl;
382   mlib_u16 *dl;
383   mlib_s32 i, j;
384   const mlib_u16 *tab0 = &table[0][0];
385   const mlib_u16 *tab1 = &table[1][0];
386   const mlib_u16 *tab2 = &table[2][0];
387 
388   sl = (void *)src;
389   dl = dst;
390 
391   /* row loop */
392   for (j = 0; j < ysize; j++) {
393     mlib_u16 *sp = sl;
394     mlib_u16 *dp = dl;
395     mlib_s32 off, s0, size = xsize;
396 
397     off = ((mlib_addr) dp & 7) >> 1;
398     off = (off < size) ? off : size;
399 
400     for (i = 0; i < off; i++) {
401       s0 = *sp++;
402       *dp++ = tab0[s0];
403       *dp++ = tab1[s0];
404       *dp++ = tab2[s0];
405       size--;
406     }
407 
408     if (size > 0) {
409       mlib_v_ImageLookUpSI_U16_U16_3_D1(sp, dp, size, table);
410     }
411 
412     sl = (mlib_u16 *) ((mlib_u8 *) sl + slb);
413     dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
414   }
415 }
416 
417 /***************************************************************/
mlib_v_ImageLookUpSI_U16_U16_4_DstOff0_D1(const mlib_u16 * src,mlib_u16 * dst,mlib_s32 xsize,const mlib_u16 ** table)418 void mlib_v_ImageLookUpSI_U16_U16_4_DstOff0_D1(const mlib_u16 *src,
419                                                mlib_u16       *dst,
420                                                mlib_s32       xsize,
421                                                const mlib_u16 **table)
422 {
423   mlib_u16 *sp;                        /* pointer to source data */
424   mlib_s32 s0;                         /* source data */
425   mlib_u16 *dl;                        /* pointer to start of destination */
426   mlib_d64 *dp;                        /* aligned pointer to destination */
427   mlib_d64 t0, t1, t2, t3;             /* destination data */
428   mlib_d64 acc;                        /* destination data */
429   mlib_s32 i;                          /* loop variable */
430   const mlib_u16 *tab0 = &table[0][0];
431   const mlib_u16 *tab1 = &table[1][0];
432   const mlib_u16 *tab2 = &table[2][0];
433   const mlib_u16 *tab3 = &table[3][0];
434 
435   sp = (void *)src;
436   dl = dst;
437   dp = (mlib_d64 *) dl;
438 
439   vis_alignaddr((void *)0, 6);
440 
441   if (xsize >= 1) {
442 
443     s0 = (*sp++) << 1;
444 
445 #pragma pipeloop(0)
446     for (i = 0; i <= xsize - 2; i++) {
447       t3 = VIS_LD_U16_I(tab3, s0);
448       t2 = VIS_LD_U16_I(tab2, s0);
449       t1 = VIS_LD_U16_I(tab1, s0);
450       t0 = VIS_LD_U16_I(tab0, s0);
451       acc = vis_faligndata(t3, acc);
452       acc = vis_faligndata(t2, acc);
453       acc = vis_faligndata(t1, acc);
454       acc = vis_faligndata(t0, acc);
455       s0 = (*sp++) << 1;
456       *dp++ = acc;
457     }
458 
459     t3 = VIS_LD_U16_I(tab3, s0);
460     t2 = VIS_LD_U16_I(tab2, s0);
461     t1 = VIS_LD_U16_I(tab1, s0);
462     t0 = VIS_LD_U16_I(tab0, s0);
463     acc = vis_faligndata(t3, acc);
464     acc = vis_faligndata(t2, acc);
465     acc = vis_faligndata(t1, acc);
466     acc = vis_faligndata(t0, acc);
467     *dp++ = acc;
468   }
469 }
470 
471 /***************************************************************/
mlib_v_ImageLookUpSI_U16_U16_4_DstOff1_D1(const mlib_u16 * src,mlib_u16 * dst,mlib_s32 xsize,const mlib_u16 ** table)472 void mlib_v_ImageLookUpSI_U16_U16_4_DstOff1_D1(const mlib_u16 *src,
473                                                mlib_u16       *dst,
474                                                mlib_s32       xsize,
475                                                const mlib_u16 **table)
476 {
477   mlib_u16 *sp;                        /* pointer to source data */
478   mlib_s32 s0, s1;                     /* source data */
479   mlib_u16 *dl;                        /* pointer to start of destination */
480   mlib_d64 *dp;                        /* aligned pointer to destination */
481   mlib_d64 t0, t1, t2, t3;             /* destination data */
482   mlib_d64 acc;                        /* destination data */
483   mlib_s32 i;                          /* loop variable */
484   const mlib_u16 *tab0 = &table[0][0];
485   const mlib_u16 *tab1 = &table[1][0];
486   const mlib_u16 *tab2 = &table[2][0];
487   const mlib_u16 *tab3 = &table[3][0];
488 
489   sp = (void *)src;
490   dl = dst;
491   dp = (mlib_d64 *) dl;
492 
493   vis_alignaddr((void *)0, 6);
494 
495   s0 = (*sp++) << 1;
496 
497   if (xsize >= 1) {
498 
499     s1 = (*sp++) << 1;
500 
501 #pragma pipeloop(0)
502     for (i = 0; i <= xsize - 2; i++) {
503       t3 = VIS_LD_U16_I(tab0, s1);
504       t2 = VIS_LD_U16_I(tab3, s0);
505       t1 = VIS_LD_U16_I(tab2, s0);
506       t0 = VIS_LD_U16_I(tab1, s0);
507       acc = vis_faligndata(t3, acc);
508       acc = vis_faligndata(t2, acc);
509       acc = vis_faligndata(t1, acc);
510       acc = vis_faligndata(t0, acc);
511       s0 = s1;
512       s1 = (*sp++) << 1;
513       *dp++ = acc;
514     }
515 
516     t3 = VIS_LD_U16_I(tab0, s1);
517     t2 = VIS_LD_U16_I(tab3, s0);
518     t1 = VIS_LD_U16_I(tab2, s0);
519     t0 = VIS_LD_U16_I(tab1, s0);
520     acc = vis_faligndata(t3, acc);
521     acc = vis_faligndata(t2, acc);
522     acc = vis_faligndata(t1, acc);
523     acc = vis_faligndata(t0, acc);
524     s0 = s1;
525     *dp++ = acc;
526   }
527 
528   dl = (mlib_u16 *) dp;
529   s0 >>= 1;
530 
531   dl[0] = tab1[s0];
532   dl[1] = tab2[s0];
533   dl[2] = tab3[s0];
534 }
535 
536 /***************************************************************/
mlib_v_ImageLookUpSI_U16_U16_4_DstOff2_D1(const mlib_u16 * src,mlib_u16 * dst,mlib_s32 xsize,const mlib_u16 ** table)537 void mlib_v_ImageLookUpSI_U16_U16_4_DstOff2_D1(const mlib_u16 *src,
538                                                mlib_u16       *dst,
539                                                mlib_s32       xsize,
540                                                const mlib_u16 **table)
541 {
542   mlib_u16 *sp;                        /* pointer to source data */
543   mlib_s32 s0, s1;                     /* source data */
544   mlib_u16 *dl;                        /* pointer to start of destination */
545   mlib_d64 *dp;                        /* aligned pointer to destination */
546   mlib_d64 t0, t1, t2, t3;             /* destination data */
547   mlib_d64 acc;                        /* destination data */
548   mlib_s32 i;                          /* loop variable */
549   const mlib_u16 *tab0 = &table[0][0];
550   const mlib_u16 *tab1 = &table[1][0];
551   const mlib_u16 *tab2 = &table[2][0];
552   const mlib_u16 *tab3 = &table[3][0];
553 
554   sp = (void *)src;
555   dl = dst;
556   dp = (mlib_d64 *) dl;
557 
558   vis_alignaddr((void *)0, 6);
559 
560   s0 = (*sp++) << 1;
561 
562   if (xsize >= 1) {
563 
564     s1 = (*sp++) << 1;
565 
566 #pragma pipeloop(0)
567     for (i = 0; i <= xsize - 2; i++) {
568       t3 = VIS_LD_U16_I(tab1, s1);
569       t2 = VIS_LD_U16_I(tab0, s1);
570       t1 = VIS_LD_U16_I(tab3, s0);
571       t0 = VIS_LD_U16_I(tab2, s0);
572       acc = vis_faligndata(t3, acc);
573       acc = vis_faligndata(t2, acc);
574       acc = vis_faligndata(t1, acc);
575       acc = vis_faligndata(t0, acc);
576       s0 = s1;
577       s1 = (*sp++) << 1;
578       *dp++ = acc;
579     }
580 
581     t3 = VIS_LD_U16_I(tab1, s1);
582     t2 = VIS_LD_U16_I(tab0, s1);
583     t1 = VIS_LD_U16_I(tab3, s0);
584     t0 = VIS_LD_U16_I(tab2, s0);
585     acc = vis_faligndata(t3, acc);
586     acc = vis_faligndata(t2, acc);
587     acc = vis_faligndata(t1, acc);
588     acc = vis_faligndata(t0, acc);
589     s0 = s1;
590     *dp++ = acc;
591   }
592 
593   dl = (mlib_u16 *) dp;
594   s0 >>= 1;
595 
596   dl[0] = tab2[s0];
597   dl[1] = tab3[s0];
598 }
599 
600 /***************************************************************/
mlib_v_ImageLookUpSI_U16_U16_4_DstOff3_D1(const mlib_u16 * src,mlib_u16 * dst,mlib_s32 xsize,const mlib_u16 ** table)601 void mlib_v_ImageLookUpSI_U16_U16_4_DstOff3_D1(const mlib_u16 *src,
602                                                mlib_u16       *dst,
603                                                mlib_s32       xsize,
604                                                const mlib_u16 **table)
605 {
606   mlib_u16 *sp;                        /* pointer to source data */
607   mlib_s32 s0, s1;                     /* source data */
608   mlib_u16 *dl;                        /* pointer to start of destination */
609   mlib_d64 *dp;                        /* aligned pointer to destination */
610   mlib_d64 t0, t1, t2, t3;             /* destination data */
611   mlib_d64 acc;                        /* destination data */
612   mlib_s32 i;                          /* loop variable */
613   const mlib_u16 *tab0 = &table[0][0];
614   const mlib_u16 *tab1 = &table[1][0];
615   const mlib_u16 *tab2 = &table[2][0];
616   const mlib_u16 *tab3 = &table[3][0];
617 
618   sp = (void *)src;
619   dl = dst;
620   dp = (mlib_d64 *) dl;
621 
622   vis_alignaddr((void *)0, 6);
623 
624   s0 = (*sp++) << 1;
625 
626   if (xsize >= 1) {
627 
628     s1 = (*sp++) << 1;
629 
630 #pragma pipeloop(0)
631     for (i = 0; i <= xsize - 2; i++) {
632       t3 = VIS_LD_U16_I(tab2, s1);
633       t2 = VIS_LD_U16_I(tab1, s1);
634       t1 = VIS_LD_U16_I(tab0, s1);
635       t0 = VIS_LD_U16_I(tab3, s0);
636       acc = vis_faligndata(t3, acc);
637       acc = vis_faligndata(t2, acc);
638       acc = vis_faligndata(t1, acc);
639       acc = vis_faligndata(t0, acc);
640       s0 = s1;
641       s1 = (*sp++) << 1;
642       *dp++ = acc;
643     }
644 
645     t3 = VIS_LD_U16_I(tab2, s1);
646     t2 = VIS_LD_U16_I(tab1, s1);
647     t1 = VIS_LD_U16_I(tab0, s1);
648     t0 = VIS_LD_U16_I(tab3, s0);
649     acc = vis_faligndata(t3, acc);
650     acc = vis_faligndata(t2, acc);
651     acc = vis_faligndata(t1, acc);
652     acc = vis_faligndata(t0, acc);
653     s0 = s1;
654     *dp++ = acc;
655   }
656 
657   dl = (mlib_u16 *) dp;
658   s0 >>= 1;
659 
660   dl[0] = tab3[s0];
661 }
662 
663 /***************************************************************/
mlib_v_ImageLookUpSI_U16_U16_4(const mlib_u16 * src,mlib_s32 slb,mlib_u16 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_u16 ** table)664 void mlib_v_ImageLookUpSI_U16_U16_4(const mlib_u16 *src,
665                                     mlib_s32       slb,
666                                     mlib_u16       *dst,
667                                     mlib_s32       dlb,
668                                     mlib_s32       xsize,
669                                     mlib_s32       ysize,
670                                     const mlib_u16 **table)
671 {
672   mlib_u16 *sl;
673   mlib_u16 *dl;
674   mlib_s32 j;
675   const mlib_u16 *tab0 = &table[0][0];
676   const mlib_u16 *tab1 = &table[1][0];
677   const mlib_u16 *tab2 = &table[2][0];
678 
679   sl = (void *)src;
680   dl = dst;
681 
682   /* row loop */
683   for (j = 0; j < ysize; j++) {
684     mlib_u16 *sp = sl;
685     mlib_u16 *dp = dl;
686     mlib_s32 off, s0, size = xsize;
687 
688     if (size > 0) {
689       off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
690 
691       if (off == 0) {
692         mlib_v_ImageLookUpSI_U16_U16_4_DstOff0_D1(sp, dp, size, table);
693       }
694       else if (off == 1) {
695         s0 = *sp;
696         *dp++ = tab0[s0];
697         size--;
698         mlib_v_ImageLookUpSI_U16_U16_4_DstOff1_D1(sp, dp, size, table);
699       }
700       else if (off == 2) {
701         s0 = *sp;
702         *dp++ = tab0[s0];
703         *dp++ = tab1[s0];
704         size--;
705         mlib_v_ImageLookUpSI_U16_U16_4_DstOff2_D1(sp, dp, size, table);
706       }
707       else if (off == 3) {
708         s0 = *sp;
709         *dp++ = tab0[s0];
710         *dp++ = tab1[s0];
711         *dp++ = tab2[s0];
712         size--;
713         mlib_v_ImageLookUpSI_U16_U16_4_DstOff3_D1(sp, dp, size, table);
714       }
715     }
716 
717     sl = (mlib_u16 *) ((mlib_u8 *) sl + slb);
718     dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
719   }
720 }
721 
722 /***************************************************************/
723