1 /*
2  * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 
27 
28 #include "vis_proto.h"
29 #include "mlib_image.h"
30 #include "mlib_v_ImageLookUpFunc.h"
31 
32 /***************************************************************/
33 static void mlib_v_ImageLookUpSI_S16_S16_2_DstA8D1(const mlib_s16 *src,
34                                                    mlib_s16       *dst,
35                                                    mlib_s32       xsize,
36                                                    const mlib_s16 **table);
37 
38 static void mlib_v_ImageLookUpSI_S16_S16_2_D1(const mlib_s16 *src,
39                                               mlib_s16       *dst,
40                                               mlib_s32       xsize,
41                                               const mlib_s16 **table);
42 
43 static void mlib_v_ImageLookUpSI_S16_S16_3_D1(const mlib_s16 *src,
44                                               mlib_s16       *dst,
45                                               mlib_s32       xsize,
46                                               const mlib_s16 **table);
47 
48 static void mlib_v_ImageLookUpSI_S16_S16_4_DstOff0_D1(const mlib_s16 *src,
49                                                       mlib_s16       *dst,
50                                                       mlib_s32       xsize,
51                                                       const mlib_s16 **table);
52 
53 static void mlib_v_ImageLookUpSI_S16_S16_4_DstOff1_D1(const mlib_s16 *src,
54                                                       mlib_s16       *dst,
55                                                       mlib_s32       xsize,
56                                                       const mlib_s16 **table);
57 
58 static void mlib_v_ImageLookUpSI_S16_S16_4_DstOff2_D1(const mlib_s16 *src,
59                                                       mlib_s16       *dst,
60                                                       mlib_s32       xsize,
61                                                       const mlib_s16 **table);
62 
63 static void mlib_v_ImageLookUpSI_S16_S16_4_DstOff3_D1(const mlib_s16 *src,
64                                                       mlib_s16       *dst,
65                                                       mlib_s32       xsize,
66                                                       const mlib_s16 **table);
67 
68 /***************************************************************/
69 #define VIS_LD_U16_I(X, Y)      vis_ld_u16_i((void *)(X), (Y))
70 
71 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S16_2_DstA8D1(const mlib_s16 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 ** table)72 void mlib_v_ImageLookUpSI_S16_S16_2_DstA8D1(const mlib_s16 *src,
73                                             mlib_s16       *dst,
74                                             mlib_s32       xsize,
75                                             const mlib_s16 **table)
76 {
77   mlib_s16 *sp;              /* pointer to source data */
78   mlib_s32 s0, s1;           /* source data */
79   mlib_s16 *dl;              /* pointer to start of destination */
80   mlib_d64 *dp;              /* aligned pointer to destination */
81   mlib_d64 t0, t1, t2;       /* destination data */
82   mlib_d64 t3, acc;          /* destination data */
83   mlib_s32 i;                /* loop variable */
84   const mlib_s16 *tab0 = &table[0][32768];
85   const mlib_s16 *tab1 = &table[1][32768];
86 
87   sp   = (void *)src;
88   dl   = dst;
89   dp   = (mlib_d64 *) dl;
90 
91   vis_alignaddr((void *) 0, 6);
92 
93   if (xsize >= 2) {
94 
95     s0 = (sp[0] << 1);
96     s1 = (sp[1] << 1);
97     sp += 2;
98 
99 #pragma pipeloop(0)
100     for(i = 0; i <= xsize - 4; i+=2, sp+=2) {
101       t3 = VIS_LD_U16_I(tab1, s1);
102       t2 = VIS_LD_U16_I(tab0, s1);
103       t1 = VIS_LD_U16_I(tab1, s0);
104       t0 = VIS_LD_U16_I(tab0, s0);
105       acc = vis_faligndata(t3, acc);
106       acc = vis_faligndata(t2, acc);
107       acc = vis_faligndata(t1, acc);
108       acc = vis_faligndata(t0, acc);
109       s0 = (sp[0] << 1);
110       s1 = (sp[1] << 1);
111       *dp++ = acc;
112     }
113 
114     t3 = VIS_LD_U16_I(tab1, s1);
115     t2 = VIS_LD_U16_I(tab0, s1);
116     t1 = VIS_LD_U16_I(tab1, s0);
117     t0 = VIS_LD_U16_I(tab0, s0);
118     acc = vis_faligndata(t3, acc);
119     acc = vis_faligndata(t2, acc);
120     acc = vis_faligndata(t1, acc);
121     acc = vis_faligndata(t0, acc);
122     *dp++ = acc;
123   }
124 
125   if ((xsize & 1) != 0) {
126     s0 = (sp[0] << 1);
127     t1 = VIS_LD_U16_I(tab1, s0);
128     t0 = VIS_LD_U16_I(tab0, s0);
129     acc = vis_faligndata(t1, acc);
130     acc = vis_faligndata(t0, acc);
131     *(mlib_f32*)dp = vis_read_hi(acc);
132   }
133 }
134 
135 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S16_2_D1(const mlib_s16 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 ** table)136 void mlib_v_ImageLookUpSI_S16_S16_2_D1(const mlib_s16 *src,
137                                        mlib_s16       *dst,
138                                        mlib_s32       xsize,
139                                        const mlib_s16 **table)
140 {
141   mlib_s16 *sp;                /* pointer to source data */
142   mlib_s32 s0, s1, s2;         /* source data */
143   mlib_s16 *dl;                /* pointer to start of destination */
144   mlib_d64 *dp;                /* aligned pointer to destination */
145   mlib_d64 t0, t1, t2;         /* destination data */
146   mlib_d64 t3, acc;            /* destination data */
147   mlib_s32 i;                  /* loop variable */
148   const mlib_s16 *tab0 = &table[0][32768];
149   const mlib_s16 *tab1 = &table[1][32768];
150 
151   sp   = (void *)src;
152   dl   = dst;
153 
154   vis_alignaddr((void *) 0, 6);
155 
156   s0 = *sp++;
157   *dl++ = tab0[s0];
158   dp   = (mlib_d64 *) dl;
159   xsize--; s0 <<= 1;
160 
161   if (xsize >= 2) {
162 
163     s1 = (sp[0] << 1);
164     s2 = (sp[1] << 1);
165     sp += 2;
166 
167 #pragma pipeloop(0)
168     for(i = 0; i <= xsize - 4; i+=2, sp+=2) {
169       t3 = VIS_LD_U16_I(tab0, s2);
170       t2 = VIS_LD_U16_I(tab1, s1);
171       t1 = VIS_LD_U16_I(tab0, s1);
172       t0 = VIS_LD_U16_I(tab1, s0);
173       acc = vis_faligndata(t3, acc);
174       acc = vis_faligndata(t2, acc);
175       acc = vis_faligndata(t1, acc);
176       acc = vis_faligndata(t0, acc);
177       s0 = s2;
178       s1 = (sp[0] << 1);
179       s2 = (sp[1] << 1);
180       *dp++ = acc;
181     }
182 
183     t3 = VIS_LD_U16_I(tab0, s2);
184     t2 = VIS_LD_U16_I(tab1, s1);
185     t1 = VIS_LD_U16_I(tab0, s1);
186     t0 = VIS_LD_U16_I(tab1, s0);
187     acc = vis_faligndata(t3, acc);
188     acc = vis_faligndata(t2, acc);
189     acc = vis_faligndata(t1, acc);
190     acc = vis_faligndata(t0, acc);
191     s0 = s2;
192     *dp++ = acc;
193   }
194 
195   dl = (mlib_s16*)dp;
196 
197   if ((xsize & 1) != 0) {
198     s1 = (sp[0] << 1);
199     t1 = VIS_LD_U16_I(tab0, s1);
200     t0 = VIS_LD_U16_I(tab1, s0);
201     acc = vis_faligndata(t1, acc);
202     acc = vis_faligndata(t0, acc);
203     *(mlib_f32*)dp = vis_read_hi(acc);
204     s0 = s1; dl += 2;
205   }
206 
207   s0 >>= 1;
208   *dl = tab1[s0];
209 }
210 
211 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S16_2(const mlib_s16 * src,mlib_s32 slb,mlib_s16 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_s16 ** table)212 void mlib_v_ImageLookUpSI_S16_S16_2(const mlib_s16 *src,
213                                     mlib_s32       slb,
214                                     mlib_s16       *dst,
215                                     mlib_s32       dlb,
216                                     mlib_s32       xsize,
217                                     mlib_s32       ysize,
218                                     const mlib_s16 **table)
219 {
220   mlib_s16 *sl;
221   mlib_s16 *dl;
222   mlib_s32 j;
223   const mlib_s16 *tab0 = &table[0][32768];
224   const mlib_s16 *tab1 = &table[1][32768];
225 
226   sl = (void *)src;
227   dl = dst;
228 
229   /* row loop */
230   for (j = 0; j < ysize; j ++) {
231     mlib_s16 *sp = sl;
232     mlib_s16 *dp = dl;
233     mlib_s32 off, s0, size = xsize;
234 
235     off = ((8 - ((mlib_addr)dp & 7)) & 7);
236 
237     if ((off >= 4) && (size > 0)) {
238       s0 = *sp++;
239       *dp++ = tab0[s0];
240       *dp++ = tab1[s0];
241       size--;
242     }
243 
244     if (size > 0) {
245 
246       if (((mlib_addr)dp & 7) == 0) {
247         mlib_v_ImageLookUpSI_S16_S16_2_DstA8D1(sp, dp, size, table);
248       } else {
249         mlib_v_ImageLookUpSI_S16_S16_2_D1(sp, dp, size, table);
250       }
251     }
252 
253     sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
254     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
255   }
256 }
257 
258 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S16_3_D1(const mlib_s16 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 ** table)259 void mlib_v_ImageLookUpSI_S16_S16_3_D1(const mlib_s16 *src,
260                                        mlib_s16       *dst,
261                                        mlib_s32       xsize,
262                                        const mlib_s16 **table)
263 {
264   mlib_s16 *sp;              /* pointer to source data */
265   mlib_s16 *dl;              /* pointer to start of destination */
266   mlib_d64 *dp;              /* aligned pointer to destination */
267   mlib_d64 t0, t1, t2, t3;   /* destination data */
268   mlib_d64 acc0, acc1, acc2; /* destination data */
269   mlib_s32 i;                /* loop variable */
270   const mlib_s16 *tab0 = &table[0][32768];
271   const mlib_s16 *tab1 = &table[1][32768];
272   const mlib_s16 *tab2 = &table[2][32768];
273   mlib_s32 s00, s01, s02, s03;
274 
275   sp   = (void *)src;
276   dl   = dst;
277   dp   = (mlib_d64 *) dl;
278 
279   vis_alignaddr((void *) 0, 6);
280 
281   i = 0;
282 
283   if (xsize >= 4) {
284 
285     s00 = (sp[0] << 1);
286     s01 = (sp[1] << 1);
287     s02 = (sp[2] << 1);
288     s03 = (sp[3] << 1);
289     sp += 4;
290 
291 #pragma pipeloop(0)
292     for(i = 0; i <= xsize - 8; i+=4, sp+=4) {
293       t3 = VIS_LD_U16_I(tab0, s01);
294       t2 = VIS_LD_U16_I(tab2, s00);
295       t1 = VIS_LD_U16_I(tab1, s00);
296       t0 = VIS_LD_U16_I(tab0, s00);
297       acc0 = vis_faligndata(t3, acc0);
298       acc0 = vis_faligndata(t2, acc0);
299       acc0 = vis_faligndata(t1, acc0);
300       acc0 = vis_faligndata(t0, acc0);
301       t3 = VIS_LD_U16_I(tab1, s02);
302       t2 = VIS_LD_U16_I(tab0, s02);
303       t1 = VIS_LD_U16_I(tab2, s01);
304       t0 = VIS_LD_U16_I(tab1, s01);
305       acc1 = vis_faligndata(t3, acc1);
306       acc1 = vis_faligndata(t2, acc1);
307       acc1 = vis_faligndata(t1, acc1);
308       acc1 = vis_faligndata(t0, acc1);
309       t3 = VIS_LD_U16_I(tab2, s03);
310       t2 = VIS_LD_U16_I(tab1, s03);
311       t1 = VIS_LD_U16_I(tab0, s03);
312       t0 = VIS_LD_U16_I(tab2, s02);
313       acc2 = vis_faligndata(t3, acc2);
314       acc2 = vis_faligndata(t2, acc2);
315       acc2 = vis_faligndata(t1, acc2);
316       acc2 = vis_faligndata(t0, acc2);
317       s00 = (sp[0] << 1);
318       s01 = (sp[1] << 1);
319       s02 = (sp[2] << 1);
320       s03 = (sp[3] << 1);
321       *dp++ = acc0;
322       *dp++ = acc1;
323       *dp++ = acc2;
324     }
325 
326     t3 = VIS_LD_U16_I(tab0, s01);
327     t2 = VIS_LD_U16_I(tab2, s00);
328     t1 = VIS_LD_U16_I(tab1, s00);
329     t0 = VIS_LD_U16_I(tab0, s00);
330     acc0 = vis_faligndata(t3, acc0);
331     acc0 = vis_faligndata(t2, acc0);
332     acc0 = vis_faligndata(t1, acc0);
333     acc0 = vis_faligndata(t0, acc0);
334     t3 = VIS_LD_U16_I(tab1, s02);
335     t2 = VIS_LD_U16_I(tab0, s02);
336     t1 = VIS_LD_U16_I(tab2, s01);
337     t0 = VIS_LD_U16_I(tab1, s01);
338     acc1 = vis_faligndata(t3, acc1);
339     acc1 = vis_faligndata(t2, acc1);
340     acc1 = vis_faligndata(t1, acc1);
341     acc1 = vis_faligndata(t0, acc1);
342     t3 = VIS_LD_U16_I(tab2, s03);
343     t2 = VIS_LD_U16_I(tab1, s03);
344     t1 = VIS_LD_U16_I(tab0, s03);
345     t0 = VIS_LD_U16_I(tab2, s02);
346     acc2 = vis_faligndata(t3, acc2);
347     acc2 = vis_faligndata(t2, acc2);
348     acc2 = vis_faligndata(t1, acc2);
349     acc2 = vis_faligndata(t0, acc2);
350     *dp++ = acc0;
351     *dp++ = acc1;
352     *dp++ = acc2;
353     i += 4;
354   }
355 
356   dl = (mlib_s16*)dp;
357 
358 #pragma pipeloop(0)
359   for (; i < xsize; i++) {
360     s00 = sp[0];
361     dl[0] = tab0[s00];
362     dl[1] = tab1[s00];
363     dl[2] = tab2[s00];
364     dl += 3; sp ++;
365   }
366 }
367 
368 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S16_3(const mlib_s16 * src,mlib_s32 slb,mlib_s16 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_s16 ** table)369 void mlib_v_ImageLookUpSI_S16_S16_3(const mlib_s16 *src,
370                                     mlib_s32       slb,
371                                     mlib_s16       *dst,
372                                     mlib_s32       dlb,
373                                     mlib_s32       xsize,
374                                     mlib_s32       ysize,
375                                     const mlib_s16 **table)
376 {
377   mlib_s16  *sl;
378   mlib_s16 *dl;
379   mlib_s32 i, j;
380   const mlib_s16 *tab0 = &table[0][32768];
381   const mlib_s16 *tab1 = &table[1][32768];
382   const mlib_s16 *tab2 = &table[2][32768];
383 
384   sl = (void *)src;
385   dl = dst;
386 
387   /* row loop */
388   for (j = 0; j < ysize; j ++) {
389     mlib_s16 *sp = sl;
390     mlib_s16*dp = dl;
391     mlib_s32 off, s0, size = xsize;
392 
393     off = ((mlib_addr)dp & 7) >> 1;
394     off = (off < size) ? off : size;
395 
396     for (i = 0; i < off; i++) {
397       s0 = *sp++;
398       *dp++ = tab0[s0];
399       *dp++ = tab1[s0];
400       *dp++ = tab2[s0];
401       size--;
402     }
403 
404     if (size > 0) {
405       mlib_v_ImageLookUpSI_S16_S16_3_D1(sp, dp, size, table);
406     }
407 
408     sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
409     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
410   }
411 }
412 
413 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S16_4_DstOff0_D1(const mlib_s16 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 ** table)414 void mlib_v_ImageLookUpSI_S16_S16_4_DstOff0_D1(const mlib_s16 *src,
415                                                mlib_s16       *dst,
416                                                mlib_s32       xsize,
417                                                const mlib_s16 **table)
418 {
419   mlib_s16 *sp;              /* pointer to source data */
420   mlib_s32 s0;               /* source data */
421   mlib_s16 *dl;              /* pointer to start of destination */
422   mlib_d64 *dp;              /* aligned pointer to destination */
423   mlib_d64 t0, t1, t2, t3;   /* destination data */
424   mlib_d64 acc;              /* destination data */
425   mlib_s32 i;                /* loop variable */
426   const mlib_s16 *tab0 = &table[0][32768];
427   const mlib_s16 *tab1 = &table[1][32768];
428   const mlib_s16 *tab2 = &table[2][32768];
429   const mlib_s16 *tab3 = &table[3][32768];
430 
431   sp   = (void *)src;
432   dl   = dst;
433   dp   = (mlib_d64 *) dl;
434 
435   vis_alignaddr((void *) 0, 6);
436 
437   if (xsize >= 1) {
438 
439     s0 = (*sp++) << 1;
440 
441 #pragma pipeloop(0)
442     for(i = 0; i <= xsize - 2; i++) {
443       t3 = VIS_LD_U16_I(tab3, s0);
444       t2 = VIS_LD_U16_I(tab2, s0);
445       t1 = VIS_LD_U16_I(tab1, s0);
446       t0 = VIS_LD_U16_I(tab0, s0);
447       acc = vis_faligndata(t3, acc);
448       acc = vis_faligndata(t2, acc);
449       acc = vis_faligndata(t1, acc);
450       acc = vis_faligndata(t0, acc);
451       s0 = (*sp++) << 1;
452       *dp++ = acc;
453     }
454 
455     t3 = VIS_LD_U16_I(tab3, s0);
456     t2 = VIS_LD_U16_I(tab2, s0);
457     t1 = VIS_LD_U16_I(tab1, s0);
458     t0 = VIS_LD_U16_I(tab0, s0);
459     acc = vis_faligndata(t3, acc);
460     acc = vis_faligndata(t2, acc);
461     acc = vis_faligndata(t1, acc);
462     acc = vis_faligndata(t0, acc);
463     *dp++ = acc;
464   }
465 }
466 
467 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S16_4_DstOff1_D1(const mlib_s16 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 ** table)468 void mlib_v_ImageLookUpSI_S16_S16_4_DstOff1_D1(const mlib_s16 *src,
469                                                mlib_s16       *dst,
470                                                mlib_s32       xsize,
471                                                const mlib_s16 **table)
472 {
473   mlib_s16 *sp;              /* pointer to source data */
474   mlib_s32 s0, s1;           /* source data */
475   mlib_s16 *dl;              /* pointer to start of destination */
476   mlib_d64 *dp;              /* aligned pointer to destination */
477   mlib_d64 t0, t1, t2, t3;   /* destination data */
478   mlib_d64 acc;              /* destination data */
479   mlib_s32 i;                /* loop variable */
480   const mlib_s16 *tab0 = &table[0][32768];
481   const mlib_s16 *tab1 = &table[1][32768];
482   const mlib_s16 *tab2 = &table[2][32768];
483   const mlib_s16 *tab3 = &table[3][32768];
484 
485   sp   = (void *)src;
486   dl   = dst;
487   dp   = (mlib_d64 *) dl;
488 
489   vis_alignaddr((void *) 0, 6);
490 
491   s0 = (*sp++) << 1;
492 
493   if (xsize >= 1) {
494 
495     s1 = (*sp++) << 1;
496 
497 #pragma pipeloop(0)
498     for(i = 0; i <= xsize - 2; i++) {
499       t3 = VIS_LD_U16_I(tab0, s1);
500       t2 = VIS_LD_U16_I(tab3, s0);
501       t1 = VIS_LD_U16_I(tab2, s0);
502       t0 = VIS_LD_U16_I(tab1, s0);
503       acc = vis_faligndata(t3, acc);
504       acc = vis_faligndata(t2, acc);
505       acc = vis_faligndata(t1, acc);
506       acc = vis_faligndata(t0, acc);
507       s0 = s1;
508       s1 = (*sp++) << 1;
509       *dp++ = acc;
510     }
511 
512     t3 = VIS_LD_U16_I(tab0, s1);
513     t2 = VIS_LD_U16_I(tab3, s0);
514     t1 = VIS_LD_U16_I(tab2, s0);
515     t0 = VIS_LD_U16_I(tab1, s0);
516     acc = vis_faligndata(t3, acc);
517     acc = vis_faligndata(t2, acc);
518     acc = vis_faligndata(t1, acc);
519     acc = vis_faligndata(t0, acc);
520     s0 = s1;
521     *dp++ = acc;
522   }
523 
524   dl = (mlib_s16*)dp;
525   s0 >>= 1;
526 
527   dl[0] = tab1[s0];
528   dl[1] = tab2[s0];
529   dl[2] = tab3[s0];
530 }
531 
532 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S16_4_DstOff2_D1(const mlib_s16 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 ** table)533 void mlib_v_ImageLookUpSI_S16_S16_4_DstOff2_D1(const mlib_s16 *src,
534                                                mlib_s16       *dst,
535                                                mlib_s32       xsize,
536                                                const mlib_s16 **table)
537 {
538   mlib_s16 *sp;              /* pointer to source data */
539   mlib_s32 s0, s1;           /* source data */
540   mlib_s16 *dl;              /* pointer to start of destination */
541   mlib_d64 *dp;              /* aligned pointer to destination */
542   mlib_d64 t0, t1, t2, t3;   /* destination data */
543   mlib_d64 acc;              /* destination data */
544   mlib_s32 i;                /* loop variable */
545   const mlib_s16 *tab0 = &table[0][32768];
546   const mlib_s16 *tab1 = &table[1][32768];
547   const mlib_s16 *tab2 = &table[2][32768];
548   const mlib_s16 *tab3 = &table[3][32768];
549 
550   sp   = (void *)src;
551   dl   = dst;
552   dp   = (mlib_d64 *) dl;
553 
554   vis_alignaddr((void *) 0, 6);
555 
556   s0 = (*sp++) << 1;
557 
558   if (xsize >= 1) {
559 
560     s1 = (*sp++) << 1;
561 
562 #pragma pipeloop(0)
563     for(i = 0; i <= xsize - 2; i++) {
564       t3 = VIS_LD_U16_I(tab1, s1);
565       t2 = VIS_LD_U16_I(tab0, s1);
566       t1 = VIS_LD_U16_I(tab3, s0);
567       t0 = VIS_LD_U16_I(tab2, s0);
568       acc = vis_faligndata(t3, acc);
569       acc = vis_faligndata(t2, acc);
570       acc = vis_faligndata(t1, acc);
571       acc = vis_faligndata(t0, acc);
572       s0 = s1;
573       s1 = (*sp++) << 1;
574       *dp++ = acc;
575     }
576 
577     t3 = VIS_LD_U16_I(tab1, s1);
578     t2 = VIS_LD_U16_I(tab0, s1);
579     t1 = VIS_LD_U16_I(tab3, s0);
580     t0 = VIS_LD_U16_I(tab2, s0);
581     acc = vis_faligndata(t3, acc);
582     acc = vis_faligndata(t2, acc);
583     acc = vis_faligndata(t1, acc);
584     acc = vis_faligndata(t0, acc);
585     s0 = s1;
586     *dp++ = acc;
587   }
588 
589   dl = (mlib_s16*)dp;
590   s0 >>= 1;
591 
592   dl[0] = tab2[s0];
593   dl[1] = tab3[s0];
594 }
595 
596 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S16_4_DstOff3_D1(const mlib_s16 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 ** table)597 void mlib_v_ImageLookUpSI_S16_S16_4_DstOff3_D1(const mlib_s16 *src,
598                                                mlib_s16       *dst,
599                                                mlib_s32       xsize,
600                                                const mlib_s16 **table)
601 {
602   mlib_s16 *sp;              /* pointer to source data */
603   mlib_s32 s0, s1;           /* source data */
604   mlib_s16 *dl;              /* pointer to start of destination */
605   mlib_d64 *dp;              /* aligned pointer to destination */
606   mlib_d64 t0, t1, t2, t3;   /* destination data */
607   mlib_d64 acc;              /* destination data */
608   mlib_s32 i;                /* loop variable */
609   const mlib_s16 *tab0 = &table[0][32768];
610   const mlib_s16 *tab1 = &table[1][32768];
611   const mlib_s16 *tab2 = &table[2][32768];
612   const mlib_s16 *tab3 = &table[3][32768];
613 
614   sp   = (void *)src;
615   dl   = dst;
616   dp   = (mlib_d64 *) dl;
617 
618   vis_alignaddr((void *) 0, 6);
619 
620   s0 = (*sp++) << 1;
621 
622   if (xsize >= 1) {
623 
624     s1 = (*sp++) << 1;
625 
626 #pragma pipeloop(0)
627     for(i = 0; i <= xsize - 2; i++) {
628       t3 = VIS_LD_U16_I(tab2, s1);
629       t2 = VIS_LD_U16_I(tab1, s1);
630       t1 = VIS_LD_U16_I(tab0, s1);
631       t0 = VIS_LD_U16_I(tab3, s0);
632       acc = vis_faligndata(t3, acc);
633       acc = vis_faligndata(t2, acc);
634       acc = vis_faligndata(t1, acc);
635       acc = vis_faligndata(t0, acc);
636       s0 = s1;
637       s1 = (*sp++) << 1;
638       *dp++ = acc;
639     }
640 
641     t3 = VIS_LD_U16_I(tab2, s1);
642     t2 = VIS_LD_U16_I(tab1, s1);
643     t1 = VIS_LD_U16_I(tab0, s1);
644     t0 = VIS_LD_U16_I(tab3, s0);
645     acc = vis_faligndata(t3, acc);
646     acc = vis_faligndata(t2, acc);
647     acc = vis_faligndata(t1, acc);
648     acc = vis_faligndata(t0, acc);
649     s0 = s1;
650     *dp++ = acc;
651   }
652 
653   dl = (mlib_s16*)dp;
654   s0 >>= 1;
655 
656   dl[0] = tab3[s0];
657 }
658 
659 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S16_4(const mlib_s16 * src,mlib_s32 slb,mlib_s16 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_s16 ** table)660 void mlib_v_ImageLookUpSI_S16_S16_4(const mlib_s16 *src,
661                                     mlib_s32       slb,
662                                     mlib_s16       *dst,
663                                     mlib_s32       dlb,
664                                     mlib_s32       xsize,
665                                     mlib_s32       ysize,
666                                     const mlib_s16 **table)
667 {
668   mlib_s16 *sl;
669   mlib_s16 *dl;
670   mlib_s32 j;
671   const mlib_s16 *tab0 = &table[0][32768];
672   const mlib_s16 *tab1 = &table[1][32768];
673   const mlib_s16 *tab2 = &table[2][32768];
674 
675   sl = (void *)src;
676   dl = dst;
677 
678   /* row loop */
679   for (j = 0; j < ysize; j ++) {
680     mlib_s16 *sp = sl;
681     mlib_s16 *dp = dl;
682     mlib_s32 off, s0, size = xsize;
683 
684     if (size > 0) {
685       off =  ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;
686 
687       if (off == 0) {
688         mlib_v_ImageLookUpSI_S16_S16_4_DstOff0_D1(sp, dp, size, table);
689       } else if (off == 1) {
690         s0 = *sp;
691         *dp++ = tab0[s0];
692         size--;
693         mlib_v_ImageLookUpSI_S16_S16_4_DstOff1_D1(sp, dp, size, table);
694       } else if (off == 2) {
695         s0 = *sp;
696         *dp++ = tab0[s0];
697         *dp++ = tab1[s0];
698         size--;
699         mlib_v_ImageLookUpSI_S16_S16_4_DstOff2_D1(sp, dp, size, table);
700       } else if (off == 3) {
701         s0 = *sp;
702         *dp++ = tab0[s0];
703         *dp++ = tab1[s0];
704         *dp++ = tab2[s0];
705         size--;
706         mlib_v_ImageLookUpSI_S16_S16_4_DstOff3_D1(sp, dp, size, table);
707       }
708     }
709 
710     sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
711     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
712   }
713 }
714 
715 /***************************************************************/
716