1 /*
2  * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 
27 
28 #include "vis_proto.h"
29 #include "mlib_image.h"
30 #include "mlib_v_ImageLookUpFunc.h"
31 
32 /***************************************************************/
33 static void mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(const mlib_u8  *src,
34                                                      mlib_s16       *dst,
35                                                      mlib_s32       xsize,
36                                                      const mlib_s16 *table0,
37                                                      const mlib_s16 *table1,
38                                                      const mlib_s16 *table2,
39                                                      const mlib_s16 *table3);
40 
41 static void mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(const mlib_u8  *src,
42                                                      mlib_s16       *dst,
43                                                      mlib_s32       xsize,
44                                                      const mlib_s16 *table0,
45                                                      const mlib_s16 *table1,
46                                                      const mlib_s16 *table2,
47                                                      const mlib_s16 *table3);
48 
49 static void mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(const mlib_u8  *src,
50                                                      mlib_s16       *dst,
51                                                      mlib_s32       xsize,
52                                                      const mlib_s16 *table0,
53                                                      const mlib_s16 *table1,
54                                                      const mlib_s16 *table2,
55                                                      const mlib_s16 *table3);
56 
57 static void mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(const mlib_u8  *src,
58                                                      mlib_s16       *dst,
59                                                      mlib_s32       xsize,
60                                                      const mlib_s16 *table0,
61                                                      const mlib_s16 *table1,
62                                                      const mlib_s16 *table2,
63                                                      const mlib_s16 *table3);
64 
65 static void mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(const mlib_u8  *src,
66                                                    mlib_s16       *dst,
67                                                    mlib_s32       xsize,
68                                                    const mlib_s16 *table0,
69                                                    const mlib_s16 *table1,
70                                                    const mlib_s16 *table2);
71 
72 static void mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(const mlib_u8  *src,
73                                                    mlib_s16       *dst,
74                                                    mlib_s32       xsize,
75                                                    const mlib_s16 *table0,
76                                                    const mlib_s16 *table1,
77                                                    const mlib_s16 *table2);
78 
79 static void mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(const mlib_u8  *src,
80                                                    mlib_s16       *dst,
81                                                    mlib_s32       xsize,
82                                                    const mlib_s16 *table0,
83                                                    const mlib_s16 *table1,
84                                                    const mlib_s16 *table2);
85 
86 static void mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(const mlib_u8  *src,
87                                                    mlib_s16       *dst,
88                                                    mlib_s32       xsize,
89                                                    const mlib_s16 *table0,
90                                                    const mlib_s16 *table1,
91                                                    const mlib_s16 *table2);
92 
93 /***************************************************************/
94 #define VIS_LD_U16_I(X, Y)      vis_ld_u16_i((void *)(X), (Y))
95 
96 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(const mlib_u8 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 * table0,const mlib_s16 * table1,const mlib_s16 * table2,const mlib_s16 * table3)97 void mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(const mlib_u8  *src,
98                                               mlib_s16       *dst,
99                                               mlib_s32       xsize,
100                                               const mlib_s16 *table0,
101                                               const mlib_s16 *table1,
102                                               const mlib_s16 *table2,
103                                               const mlib_s16 *table3)
104 {
105   mlib_u32 *sa;                        /* aligned pointer to source data */
106   mlib_u8 *sp;                         /* pointer to source data */
107   mlib_u32 s0;                         /* source data */
108   mlib_s16 *dl;                        /* pointer to start of destination */
109   mlib_s16 *dend;                      /* pointer to end of destination */
110   mlib_d64 *dp;                        /* aligned pointer to destination */
111   mlib_d64 t0, t1, t2;                 /* destination data */
112   mlib_d64 t3, acc0;                   /* destination data */
113   mlib_s32 emask;                      /* edge mask */
114   mlib_s32 i, num;                     /* loop variable */
115 
116   sa = (mlib_u32 *) src;
117   dl = dst;
118   dp = (mlib_d64 *) dl;
119   dend = dl + xsize - 1;
120 
121   vis_alignaddr((void *)0, 6);
122 
123   i = 0;
124 
125   if (xsize >= 4) {
126 
127     s0 = *sa++;
128 
129 #pragma pipeloop(0)
130     for (i = 0; i <= xsize - 8; i += 4) {
131       t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE);
132       t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
133       t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
134       t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
135       acc0 = vis_faligndata(t3, acc0);
136       acc0 = vis_faligndata(t2, acc0);
137       acc0 = vis_faligndata(t1, acc0);
138       acc0 = vis_faligndata(t0, acc0);
139       s0 = *sa++;
140       *dp++ = acc0;
141     }
142 
143     t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE);
144     t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
145     t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
146     t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
147     acc0 = vis_faligndata(t3, acc0);
148     acc0 = vis_faligndata(t2, acc0);
149     acc0 = vis_faligndata(t1, acc0);
150     acc0 = vis_faligndata(t0, acc0);
151     *dp++ = acc0;
152   }
153 
154   sp = (mlib_u8 *) sa;
155 
156   if ((mlib_addr) dp <= (mlib_addr) dend) {
157 
158     num = (mlib_s16 *) dend - (mlib_s16 *) dp;
159     sp += num;
160     num++;
161 
162     if (num == 1) {
163       s0 = (mlib_s32) * sp;
164       sp--;
165 
166       t0 = VIS_LD_U16_I(table0, 2 * s0);
167       acc0 = vis_faligndata(t0, acc0);
168     }
169     else if (num == 2) {
170       s0 = (mlib_s32) * sp;
171       sp--;
172 
173       t0 = VIS_LD_U16_I(table1, 2 * s0);
174       acc0 = vis_faligndata(t0, acc0);
175 
176       s0 = (mlib_s32) * sp;
177       sp--;
178 
179       t0 = VIS_LD_U16_I(table0, 2 * s0);
180       acc0 = vis_faligndata(t0, acc0);
181     }
182     else if (num == 3) {
183       s0 = (mlib_s32) * sp;
184       sp--;
185 
186       t0 = VIS_LD_U16_I(table2, 2 * s0);
187       acc0 = vis_faligndata(t0, acc0);
188 
189       s0 = (mlib_s32) * sp;
190       sp--;
191 
192       t0 = VIS_LD_U16_I(table1, 2 * s0);
193       acc0 = vis_faligndata(t0, acc0);
194 
195       s0 = (mlib_s32) * sp;
196       sp--;
197 
198       t0 = VIS_LD_U16_I(table0, 2 * s0);
199       acc0 = vis_faligndata(t0, acc0);
200     }
201 
202     emask = vis_edge16(dp, dend);
203     vis_pst_16(acc0, dp, emask);
204   }
205 }
206 
207 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(const mlib_u8 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 * table0,const mlib_s16 * table1,const mlib_s16 * table2,const mlib_s16 * table3)208 void mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(const mlib_u8  *src,
209                                               mlib_s16       *dst,
210                                               mlib_s32       xsize,
211                                               const mlib_s16 *table0,
212                                               const mlib_s16 *table1,
213                                               const mlib_s16 *table2,
214                                               const mlib_s16 *table3)
215 {
216   mlib_u32 *sa;                        /* aligned pointer to source data */
217   mlib_u8 *sp;                         /* pointer to source data */
218   mlib_u32 s0, s1;                     /* source data */
219   mlib_s16 *dl;                        /* pointer to start of destination */
220   mlib_s16 *dend;                      /* pointer to end of destination */
221   mlib_d64 *dp;                        /* aligned pointer to destination */
222   mlib_d64 t0, t1, t2;                 /* destination data */
223   mlib_d64 t3, acc0;                   /* destination data */
224   mlib_s32 emask;                      /* edge mask */
225   mlib_s32 i, num;                     /* loop variable */
226 
227   sa = (mlib_u32 *) (src - 1);
228   dl = dst;
229   dp = (mlib_d64 *) dl;
230   dend = dl + xsize - 1;
231 
232   vis_alignaddr((void *)0, 6);
233 
234   s0 = *sa++;
235 
236   if (xsize >= 4) {
237 
238     s1 = *sa++;
239 
240 #pragma pipeloop(0)
241     for (i = 0; i <= xsize - 8; i += 4) {
242       t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE);
243       t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
244       t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
245       t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
246       acc0 = vis_faligndata(t3, acc0);
247       acc0 = vis_faligndata(t2, acc0);
248       acc0 = vis_faligndata(t1, acc0);
249       acc0 = vis_faligndata(t0, acc0);
250       s0 = s1;
251       s1 = *sa++;
252       *dp++ = acc0;
253     }
254 
255     t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE);
256     t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
257     t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
258     t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
259     acc0 = vis_faligndata(t3, acc0);
260     acc0 = vis_faligndata(t2, acc0);
261     acc0 = vis_faligndata(t1, acc0);
262     acc0 = vis_faligndata(t0, acc0);
263     s0 = s1;
264     *dp++ = acc0;
265   }
266 
267   sp = (mlib_u8 *) sa;
268   sp -= 3;
269 
270   if ((mlib_addr) dp <= (mlib_addr) dend) {
271 
272     num = (mlib_s16 *) dend - (mlib_s16 *) dp;
273     sp += num;
274     num++;
275 
276     if (num == 1) {
277       s0 = (mlib_s32) * sp;
278       sp--;
279 
280       t0 = VIS_LD_U16_I(table0, 2 * s0);
281       acc0 = vis_faligndata(t0, acc0);
282     }
283     else if (num == 2) {
284       s0 = (mlib_s32) * sp;
285       sp--;
286 
287       t0 = VIS_LD_U16_I(table1, 2 * s0);
288       acc0 = vis_faligndata(t0, acc0);
289 
290       s0 = (mlib_s32) * sp;
291       sp--;
292 
293       t0 = VIS_LD_U16_I(table0, 2 * s0);
294       acc0 = vis_faligndata(t0, acc0);
295     }
296     else if (num == 3) {
297       s0 = (mlib_s32) * sp;
298       sp--;
299 
300       t0 = VIS_LD_U16_I(table2, 2 * s0);
301       acc0 = vis_faligndata(t0, acc0);
302 
303       s0 = (mlib_s32) * sp;
304       sp--;
305 
306       t0 = VIS_LD_U16_I(table1, 2 * s0);
307       acc0 = vis_faligndata(t0, acc0);
308 
309       s0 = (mlib_s32) * sp;
310       sp--;
311 
312       t0 = VIS_LD_U16_I(table0, 2 * s0);
313       acc0 = vis_faligndata(t0, acc0);
314     }
315 
316     emask = vis_edge16(dp, dend);
317     vis_pst_16(acc0, dp, emask);
318   }
319 }
320 
321 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(const mlib_u8 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 * table0,const mlib_s16 * table1,const mlib_s16 * table2,const mlib_s16 * table3)322 void mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(const mlib_u8  *src,
323                                               mlib_s16       *dst,
324                                               mlib_s32       xsize,
325                                               const mlib_s16 *table0,
326                                               const mlib_s16 *table1,
327                                               const mlib_s16 *table2,
328                                               const mlib_s16 *table3)
329 {
330   mlib_u32 *sa;                        /* aligned pointer to source data */
331   mlib_u8 *sp;                         /* pointer to source data */
332   mlib_u32 s0, s1;                     /* source data */
333   mlib_s16 *dl;                        /* pointer to start of destination */
334   mlib_s16 *dend;                      /* pointer to end of destination */
335   mlib_d64 *dp;                        /* aligned pointer to destination */
336   mlib_d64 t0, t1, t2;                 /* destination data */
337   mlib_d64 t3, acc0;                   /* destination data */
338   mlib_s32 emask;                      /* edge mask */
339   mlib_s32 i, num;                     /* loop variable */
340 
341   sa = (mlib_u32 *) (src - 2);
342   dl = dst;
343   dp = (mlib_d64 *) dl;
344   dend = dl + xsize - 1;
345 
346   vis_alignaddr((void *)0, 6);
347 
348   s0 = *sa++;
349 
350   if (xsize >= 4) {
351 
352     s1 = *sa++;
353 
354 #pragma pipeloop(0)
355     for (i = 0; i <= xsize - 8; i += 4) {
356       t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE);
357       t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
358       t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
359       t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
360       acc0 = vis_faligndata(t3, acc0);
361       acc0 = vis_faligndata(t2, acc0);
362       acc0 = vis_faligndata(t1, acc0);
363       acc0 = vis_faligndata(t0, acc0);
364       s0 = s1;
365       s1 = *sa++;
366       *dp++ = acc0;
367     }
368 
369     t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE);
370     t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
371     t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
372     t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
373     acc0 = vis_faligndata(t3, acc0);
374     acc0 = vis_faligndata(t2, acc0);
375     acc0 = vis_faligndata(t1, acc0);
376     acc0 = vis_faligndata(t0, acc0);
377     s0 = s1;
378     *dp++ = acc0;
379   }
380 
381   sp = (mlib_u8 *) sa;
382   sp -= 2;
383 
384   if ((mlib_addr) dp <= (mlib_addr) dend) {
385 
386     num = (mlib_s16 *) dend - (mlib_s16 *) dp;
387     sp += num;
388     num++;
389 
390     if (num == 1) {
391       s0 = (mlib_s32) * sp;
392       sp--;
393 
394       t0 = VIS_LD_U16_I(table0, 2 * s0);
395       acc0 = vis_faligndata(t0, acc0);
396     }
397     else if (num == 2) {
398       s0 = (mlib_s32) * sp;
399       sp--;
400 
401       t0 = VIS_LD_U16_I(table1, 2 * s0);
402       acc0 = vis_faligndata(t0, acc0);
403 
404       s0 = (mlib_s32) * sp;
405       sp--;
406 
407       t0 = VIS_LD_U16_I(table0, 2 * s0);
408       acc0 = vis_faligndata(t0, acc0);
409     }
410     else if (num == 3) {
411       s0 = (mlib_s32) * sp;
412       sp--;
413 
414       t0 = VIS_LD_U16_I(table2, 2 * s0);
415       acc0 = vis_faligndata(t0, acc0);
416 
417       s0 = (mlib_s32) * sp;
418       sp--;
419 
420       t0 = VIS_LD_U16_I(table1, 2 * s0);
421       acc0 = vis_faligndata(t0, acc0);
422 
423       s0 = (mlib_s32) * sp;
424       sp--;
425 
426       t0 = VIS_LD_U16_I(table0, 2 * s0);
427       acc0 = vis_faligndata(t0, acc0);
428     }
429 
430     emask = vis_edge16(dp, dend);
431     vis_pst_16(acc0, dp, emask);
432   }
433 }
434 
435 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(const mlib_u8 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 * table0,const mlib_s16 * table1,const mlib_s16 * table2,const mlib_s16 * table3)436 void mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(const mlib_u8  *src,
437                                               mlib_s16       *dst,
438                                               mlib_s32       xsize,
439                                               const mlib_s16 *table0,
440                                               const mlib_s16 *table1,
441                                               const mlib_s16 *table2,
442                                               const mlib_s16 *table3)
443 {
444   mlib_u32 *sa;                        /* aligned pointer to source data */
445   mlib_u8 *sp;                         /* pointer to source data */
446   mlib_u32 s0, s1;                     /* source data */
447   mlib_s16 *dl;                        /* pointer to start of destination */
448   mlib_s16 *dend;                      /* pointer to end of destination */
449   mlib_d64 *dp;                        /* aligned pointer to destination */
450   mlib_d64 t0, t1, t2;                 /* destination data */
451   mlib_d64 t3, acc0;                   /* destination data */
452   mlib_s32 emask;                      /* edge mask */
453   mlib_s32 i, num;                     /* loop variable */
454 
455   sa = (mlib_u32 *) (src - 3);
456   dl = dst;
457   dp = (mlib_d64 *) dl;
458   dend = dl + xsize - 1;
459 
460   vis_alignaddr((void *)0, 6);
461 
462   s0 = *sa++;
463 
464   if (xsize >= 4) {
465 
466     s1 = *sa++;
467 
468 #pragma pipeloop(0)
469     for (i = 0; i <= xsize - 8; i += 4) {
470       t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE);
471       t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
472       t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
473       t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
474       acc0 = vis_faligndata(t3, acc0);
475       acc0 = vis_faligndata(t2, acc0);
476       acc0 = vis_faligndata(t1, acc0);
477       acc0 = vis_faligndata(t0, acc0);
478       s0 = s1;
479       s1 = *sa++;
480       *dp++ = acc0;
481     }
482 
483     t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE);
484     t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
485     t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
486     t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
487     acc0 = vis_faligndata(t3, acc0);
488     acc0 = vis_faligndata(t2, acc0);
489     acc0 = vis_faligndata(t1, acc0);
490     acc0 = vis_faligndata(t0, acc0);
491     s0 = s1;
492     *dp++ = acc0;
493   }
494 
495   sp = (mlib_u8 *) sa;
496   sp -= 1;
497 
498   if ((mlib_addr) dp <= (mlib_addr) dend) {
499 
500     num = (mlib_s16 *) dend - (mlib_s16 *) dp;
501     sp += num;
502     num++;
503 
504     if (num == 1) {
505       s0 = (mlib_s32) * sp;
506       sp--;
507 
508       t0 = VIS_LD_U16_I(table0, 2 * s0);
509       acc0 = vis_faligndata(t0, acc0);
510     }
511     else if (num == 2) {
512       s0 = (mlib_s32) * sp;
513       sp--;
514 
515       t0 = VIS_LD_U16_I(table1, 2 * s0);
516       acc0 = vis_faligndata(t0, acc0);
517 
518       s0 = (mlib_s32) * sp;
519       sp--;
520 
521       t0 = VIS_LD_U16_I(table0, 2 * s0);
522       acc0 = vis_faligndata(t0, acc0);
523     }
524     else if (num == 3) {
525       s0 = (mlib_s32) * sp;
526       sp--;
527 
528       t0 = VIS_LD_U16_I(table2, 2 * s0);
529       acc0 = vis_faligndata(t0, acc0);
530 
531       s0 = (mlib_s32) * sp;
532       sp--;
533 
534       t0 = VIS_LD_U16_I(table1, 2 * s0);
535       acc0 = vis_faligndata(t0, acc0);
536 
537       s0 = (mlib_s32) * sp;
538       sp--;
539 
540       t0 = VIS_LD_U16_I(table0, 2 * s0);
541       acc0 = vis_faligndata(t0, acc0);
542     }
543 
544     emask = vis_edge16(dp, dend);
545     vis_pst_16(acc0, dp, emask);
546   }
547 }
548 
549 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_1(const mlib_u8 * src,mlib_s32 slb,mlib_s16 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_s16 ** table)550 void mlib_v_ImageLookUp_U8_S16_1(const mlib_u8  *src,
551                                  mlib_s32       slb,
552                                  mlib_s16       *dst,
553                                  mlib_s32       dlb,
554                                  mlib_s32       xsize,
555                                  mlib_s32       ysize,
556                                  const mlib_s16 **table)
557 {
558   mlib_u8 *sl;
559   mlib_s16 *dl;
560   const mlib_s16 *tab = table[0];
561   mlib_s32 j, i;
562 
563   sl = (void *)src;
564   dl = dst;
565 
566   /* row loop */
567   for (j = 0; j < ysize; j++) {
568     mlib_u8 *sp = sl;
569     mlib_s16 *dp = dl;
570     mlib_s32 off, size = xsize;
571 
572     off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
573 
574     off = (off < size) ? off : size;
575 
576     for (i = 0; i < off; i++) {
577       *dp++ = tab[(*sp++)];
578       size--;
579     }
580 
581     if (size > 0) {
582 
583       off = (mlib_addr) sp & 3;
584 
585       if (off == 0) {
586         mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab, tab, tab,
587                                                  tab);
588       }
589       else if (off == 1) {
590         mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab, tab, tab,
591                                                  tab);
592       }
593       else if (off == 2) {
594         mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab, tab, tab,
595                                                  tab);
596       }
597       else {
598         mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab, tab, tab,
599                                                  tab);
600       }
601     }
602 
603     sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
604     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
605   }
606 }
607 
608 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_2(const mlib_u8 * src,mlib_s32 slb,mlib_s16 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_s16 ** table)609 void mlib_v_ImageLookUp_U8_S16_2(const mlib_u8  *src,
610                                  mlib_s32       slb,
611                                  mlib_s16       *dst,
612                                  mlib_s32       dlb,
613                                  mlib_s32       xsize,
614                                  mlib_s32       ysize,
615                                  const mlib_s16 **table)
616 {
617   mlib_u8 *sl;
618   mlib_s16 *dl;
619   const mlib_s16 *tab;
620   mlib_s32 j, i;
621 
622   sl = (void *)src;
623   dl = dst;
624 
625   /* row loop */
626   for (j = 0; j < ysize; j++) {
627     mlib_u8 *sp = sl;
628     mlib_s16 *dp = dl;
629     mlib_s32 off, size = xsize * 2;
630     const mlib_s16 *tab0 = table[0];
631     const mlib_s16 *tab1 = table[1];
632 
633     off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
634 
635     off = (off < size) ? off : size;
636 
637     for (i = 0; i < off - 1; i += 2) {
638       *dp++ = tab0[(*sp++)];
639       *dp++ = tab1[(*sp++)];
640       size -= 2;
641     }
642 
643     if ((off & 1) != 0) {
644       *dp++ = tab0[(*sp++)];
645       size--;
646       tab = tab0;
647       tab0 = tab1;
648       tab1 = tab;
649     }
650 
651     if (size > 0) {
652 
653       off = (mlib_addr) sp & 3;
654 
655       if (off == 0) {
656         mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab0,
657                                                  tab1);
658       }
659       else if (off == 1) {
660         mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab0,
661                                                  tab1);
662       }
663       else if (off == 2) {
664         mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab0,
665                                                  tab1);
666       }
667       else {
668         mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab0,
669                                                  tab1);
670       }
671     }
672 
673     sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
674     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
675   }
676 }
677 
678 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_4(const mlib_u8 * src,mlib_s32 slb,mlib_s16 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_s16 ** table)679 void mlib_v_ImageLookUp_U8_S16_4(const mlib_u8  *src,
680                                  mlib_s32       slb,
681                                  mlib_s16       *dst,
682                                  mlib_s32       dlb,
683                                  mlib_s32       xsize,
684                                  mlib_s32       ysize,
685                                  const mlib_s16 **table)
686 {
687   mlib_u8 *sl;
688   mlib_s16 *dl;
689   const mlib_s16 *tab;
690   mlib_s32 j;
691 
692   sl = (void *)src;
693   dl = dst;
694 
695   /* row loop */
696   for (j = 0; j < ysize; j++) {
697     mlib_u8 *sp = sl;
698     mlib_s16 *dp = dl;
699     const mlib_s16 *tab0 = table[0];
700     const mlib_s16 *tab1 = table[1];
701     const mlib_s16 *tab2 = table[2];
702     const mlib_s16 *tab3 = table[3];
703     mlib_s32 off, size = xsize * 4;
704 
705     off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
706 
707     off = (off < size) ? off : size;
708 
709     if (off == 1) {
710       *dp++ = tab0[(*sp++)];
711       tab = tab0;
712       tab0 = tab1;
713       tab1 = tab2;
714       tab2 = tab3;
715       tab3 = tab;
716       size--;
717     }
718     else if (off == 2) {
719       *dp++ = tab0[(*sp++)];
720       *dp++ = tab1[(*sp++)];
721       tab = tab0;
722       tab0 = tab2;
723       tab2 = tab;
724       tab = tab1;
725       tab1 = tab3;
726       tab3 = tab;
727       size -= 2;
728     }
729     else if (off == 3) {
730       *dp++ = tab0[(*sp++)];
731       *dp++ = tab1[(*sp++)];
732       *dp++ = tab2[(*sp++)];
733       tab = tab3;
734       tab3 = tab2;
735       tab2 = tab1;
736       tab1 = tab0;
737       tab0 = tab;
738       size -= 3;
739     }
740 
741     if (size > 0) {
742 
743       off = (mlib_addr) sp & 3;
744 
745       if (off == 0) {
746         mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2,
747                                                  tab3);
748       }
749       else if (off == 1) {
750         mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2,
751                                                  tab3);
752       }
753       else if (off == 2) {
754         mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2,
755                                                  tab3);
756       }
757       else {
758         mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2,
759                                                  tab3);
760       }
761     }
762 
763     sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
764     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
765   }
766 }
767 
768 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(const mlib_u8 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 * table0,const mlib_s16 * table1,const mlib_s16 * table2)769 void mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(const mlib_u8  *src,
770                                             mlib_s16       *dst,
771                                             mlib_s32       xsize,
772                                             const mlib_s16 *table0,
773                                             const mlib_s16 *table1,
774                                             const mlib_s16 *table2)
775 {
776   mlib_u32 *sa;                        /* aligned pointer to source data */
777   mlib_u8 *sp;                         /* pointer to source data */
778   mlib_u32 s0, s1, s2;                 /* source data */
779   mlib_s16 *dl;                        /* pointer to start of destination */
780   mlib_s16 *dend;                      /* pointer to end of destination */
781   mlib_d64 *dp;                        /* aligned pointer to destination */
782   mlib_d64 t0, t1, t2;                 /* destination data */
783   mlib_d64 t3, t4, t5;                 /* destination data */
784   mlib_d64 t6, t7, t8;                 /* destination data */
785   mlib_d64 t9, t10, t11;               /* destination data */
786   mlib_d64 acc0, acc1, acc2;           /* destination data */
787   mlib_s32 emask;                      /* edge mask */
788   mlib_s32 i, num;                     /* loop variable */
789   const mlib_s16 *table;
790 
791   sa = (mlib_u32 *) src;
792   dl = dst;
793   dp = (mlib_d64 *) dl;
794   dend = dl + xsize - 1;
795 
796   vis_alignaddr((void *)0, 6);
797 
798   i = 0;
799 
800   if (xsize >= 12) {
801 
802     s0 = sa[0];
803     s1 = sa[1];
804     s2 = sa[2];
805     sa += 3;
806 
807 #pragma pipeloop(0)
808     for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
809       t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
810       t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
811       t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
812       t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
813       t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
814       t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
815       t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
816       t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
817       t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
818       t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
819       t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
820       t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
821       acc0 = vis_faligndata(t3, acc0);
822       acc0 = vis_faligndata(t2, acc0);
823       acc0 = vis_faligndata(t1, acc0);
824       acc0 = vis_faligndata(t0, acc0);
825       acc1 = vis_faligndata(t7, acc1);
826       acc1 = vis_faligndata(t6, acc1);
827       acc1 = vis_faligndata(t5, acc1);
828       acc1 = vis_faligndata(t4, acc1);
829       acc2 = vis_faligndata(t11, acc2);
830       acc2 = vis_faligndata(t10, acc2);
831       acc2 = vis_faligndata(t9, acc2);
832       acc2 = vis_faligndata(t8, acc2);
833       s0 = sa[0];
834       s1 = sa[1];
835       s2 = sa[2];
836       dp[0] = acc0;
837       dp[1] = acc1;
838       dp[2] = acc2;
839     }
840 
841     t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
842     t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
843     t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
844     t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
845     t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
846     t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
847     t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
848     t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
849     t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
850     t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
851     t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
852     t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
853     acc0 = vis_faligndata(t3, acc0);
854     acc0 = vis_faligndata(t2, acc0);
855     acc0 = vis_faligndata(t1, acc0);
856     acc0 = vis_faligndata(t0, acc0);
857     acc1 = vis_faligndata(t7, acc1);
858     acc1 = vis_faligndata(t6, acc1);
859     acc1 = vis_faligndata(t5, acc1);
860     acc1 = vis_faligndata(t4, acc1);
861     acc2 = vis_faligndata(t11, acc2);
862     acc2 = vis_faligndata(t10, acc2);
863     acc2 = vis_faligndata(t9, acc2);
864     acc2 = vis_faligndata(t8, acc2);
865     dp[0] = acc0;
866     dp[1] = acc1;
867     dp[2] = acc2;
868     dp += 3;
869     i += 12;
870   }
871 
872   if (i <= xsize - 8) {
873     s0 = sa[0];
874     s1 = sa[1];
875     t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
876     t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
877     t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
878     t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
879     t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
880     t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
881     t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
882     t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
883     acc0 = vis_faligndata(t3, acc0);
884     acc0 = vis_faligndata(t2, acc0);
885     acc0 = vis_faligndata(t1, acc0);
886     acc0 = vis_faligndata(t0, acc0);
887     acc1 = vis_faligndata(t7, acc1);
888     acc1 = vis_faligndata(t6, acc1);
889     acc1 = vis_faligndata(t5, acc1);
890     acc1 = vis_faligndata(t4, acc1);
891     dp[0] = acc0;
892     dp[1] = acc1;
893     table = table0;
894     table0 = table2;
895     table2 = table1;
896     table1 = table;
897     sa += 2;
898     i += 8;
899     dp += 2;
900   }
901 
902   if (i <= xsize - 4) {
903     s0 = sa[0];
904     t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
905     t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
906     t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
907     t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
908     acc0 = vis_faligndata(t3, acc0);
909     acc0 = vis_faligndata(t2, acc0);
910     acc0 = vis_faligndata(t1, acc0);
911     acc0 = vis_faligndata(t0, acc0);
912     dp[0] = acc0;
913     table = table0;
914     table0 = table1;
915     table1 = table2;
916     table2 = table;
917     sa++;
918     i += 4;
919     dp++;
920   }
921 
922   sp = (mlib_u8 *) sa;
923 
924   if ((mlib_addr) dp <= (mlib_addr) dend) {
925 
926     num = (mlib_s16 *) dend - (mlib_s16 *) dp;
927     sp += num;
928     num++;
929 
930     if (num == 1) {
931       s0 = (mlib_s32) * sp;
932       sp--;
933 
934       t0 = VIS_LD_U16_I(table0, 2 * s0);
935       acc0 = vis_faligndata(t0, acc0);
936     }
937     else if (num == 2) {
938       s0 = (mlib_s32) * sp;
939       sp--;
940 
941       t0 = VIS_LD_U16_I(table1, 2 * s0);
942       acc0 = vis_faligndata(t0, acc0);
943 
944       s0 = (mlib_s32) * sp;
945       sp--;
946 
947       t0 = VIS_LD_U16_I(table0, 2 * s0);
948       acc0 = vis_faligndata(t0, acc0);
949     }
950     else if (num == 3) {
951       s0 = (mlib_s32) * sp;
952       sp--;
953 
954       t0 = VIS_LD_U16_I(table2, 2 * s0);
955       acc0 = vis_faligndata(t0, acc0);
956 
957       s0 = (mlib_s32) * sp;
958       sp--;
959 
960       t0 = VIS_LD_U16_I(table1, 2 * s0);
961       acc0 = vis_faligndata(t0, acc0);
962 
963       s0 = (mlib_s32) * sp;
964       sp--;
965 
966       t0 = VIS_LD_U16_I(table0, 2 * s0);
967       acc0 = vis_faligndata(t0, acc0);
968     }
969 
970     emask = vis_edge16(dp, dend);
971     vis_pst_16(acc0, dp, emask);
972   }
973 }
974 
975 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(const mlib_u8 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 * table0,const mlib_s16 * table1,const mlib_s16 * table2)976 void mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(const mlib_u8  *src,
977                                             mlib_s16       *dst,
978                                             mlib_s32       xsize,
979                                             const mlib_s16 *table0,
980                                             const mlib_s16 *table1,
981                                             const mlib_s16 *table2)
982 {
983   mlib_u32 *sa;                        /* aligned pointer to source data */
984   mlib_u8 *sp;                         /* pointer to source data */
985   mlib_u32 s0, s1, s2, s3;             /* source data */
986   mlib_s16 *dl;                        /* pointer to start of destination */
987   mlib_s16 *dend;                      /* pointer to end of destination */
988   mlib_d64 *dp;                        /* aligned pointer to destination */
989   mlib_d64 t0, t1, t2;                 /* destination data */
990   mlib_d64 t3, t4, t5;                 /* destination data */
991   mlib_d64 t6, t7, t8;                 /* destination data */
992   mlib_d64 t9, t10, t11;               /* destination data */
993   mlib_d64 acc0, acc1, acc2;           /* destination data */
994   mlib_s32 emask;                      /* edge mask */
995   mlib_s32 i, num;                     /* loop variable */
996   const mlib_s16 *table;
997 
998   sa = (mlib_u32 *) (src - 1);
999   dl = dst;
1000   dp = (mlib_d64 *) dl;
1001   dend = dl + xsize - 1;
1002 
1003   vis_alignaddr((void *)0, 6);
1004 
1005   i = 0;
1006 
1007   s0 = *sa++;
1008 
1009   if (xsize >= 12) {
1010 
1011     s1 = sa[0];
1012     s2 = sa[1];
1013     s3 = sa[2];
1014     sa += 3;
1015 
1016 #pragma pipeloop(0)
1017     for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
1018       t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1019       t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1020       t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1021       t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1022       t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
1023       t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
1024       t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
1025       t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
1026       t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE);
1027       t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE);
1028       t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE);
1029       t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE);
1030       acc0 = vis_faligndata(t3, acc0);
1031       acc0 = vis_faligndata(t2, acc0);
1032       acc0 = vis_faligndata(t1, acc0);
1033       acc0 = vis_faligndata(t0, acc0);
1034       acc1 = vis_faligndata(t7, acc1);
1035       acc1 = vis_faligndata(t6, acc1);
1036       acc1 = vis_faligndata(t5, acc1);
1037       acc1 = vis_faligndata(t4, acc1);
1038       acc2 = vis_faligndata(t11, acc2);
1039       acc2 = vis_faligndata(t10, acc2);
1040       acc2 = vis_faligndata(t9, acc2);
1041       acc2 = vis_faligndata(t8, acc2);
1042       s0 = s3;
1043       s1 = sa[0];
1044       s2 = sa[1];
1045       s3 = sa[2];
1046       dp[0] = acc0;
1047       dp[1] = acc1;
1048       dp[2] = acc2;
1049     }
1050 
1051     t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1052     t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1053     t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1054     t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1055     t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
1056     t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
1057     t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
1058     t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
1059     t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE);
1060     t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE);
1061     t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE);
1062     t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE);
1063     acc0 = vis_faligndata(t3, acc0);
1064     acc0 = vis_faligndata(t2, acc0);
1065     acc0 = vis_faligndata(t1, acc0);
1066     acc0 = vis_faligndata(t0, acc0);
1067     acc1 = vis_faligndata(t7, acc1);
1068     acc1 = vis_faligndata(t6, acc1);
1069     acc1 = vis_faligndata(t5, acc1);
1070     acc1 = vis_faligndata(t4, acc1);
1071     acc2 = vis_faligndata(t11, acc2);
1072     acc2 = vis_faligndata(t10, acc2);
1073     acc2 = vis_faligndata(t9, acc2);
1074     acc2 = vis_faligndata(t8, acc2);
1075     dp[0] = acc0;
1076     dp[1] = acc1;
1077     dp[2] = acc2;
1078     s0 = s3;
1079     dp += 3;
1080     i += 12;
1081   }
1082 
1083   if (i <= xsize - 8) {
1084     s1 = sa[0];
1085     s2 = sa[1];
1086     t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1087     t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1088     t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1089     t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1090     t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
1091     t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
1092     t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
1093     t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
1094     acc0 = vis_faligndata(t3, acc0);
1095     acc0 = vis_faligndata(t2, acc0);
1096     acc0 = vis_faligndata(t1, acc0);
1097     acc0 = vis_faligndata(t0, acc0);
1098     acc1 = vis_faligndata(t7, acc1);
1099     acc1 = vis_faligndata(t6, acc1);
1100     acc1 = vis_faligndata(t5, acc1);
1101     acc1 = vis_faligndata(t4, acc1);
1102     dp[0] = acc0;
1103     dp[1] = acc1;
1104     table = table0;
1105     table0 = table2;
1106     table2 = table1;
1107     table1 = table;
1108     sa += 2;
1109     i += 8;
1110     dp += 2;
1111     s0 = s2;
1112   }
1113 
1114   if (i <= xsize - 4) {
1115     s1 = sa[0];
1116     t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1117     t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1118     t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1119     t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1120     acc0 = vis_faligndata(t3, acc0);
1121     acc0 = vis_faligndata(t2, acc0);
1122     acc0 = vis_faligndata(t1, acc0);
1123     acc0 = vis_faligndata(t0, acc0);
1124     dp[0] = acc0;
1125     table = table0;
1126     table0 = table1;
1127     table1 = table2;
1128     table2 = table;
1129     sa++;
1130     i += 4;
1131     dp++;
1132     s0 = s1;
1133   }
1134 
1135   sp = (mlib_u8 *) sa;
1136   sp -= 3;
1137 
1138   if ((mlib_addr) dp <= (mlib_addr) dend) {
1139 
1140     num = (mlib_s16 *) dend - (mlib_s16 *) dp;
1141     sp += num;
1142     num++;
1143 
1144     if (num == 1) {
1145       s0 = (mlib_s32) * sp;
1146       sp--;
1147 
1148       t0 = VIS_LD_U16_I(table0, 2 * s0);
1149       acc0 = vis_faligndata(t0, acc0);
1150     }
1151     else if (num == 2) {
1152       s0 = (mlib_s32) * sp;
1153       sp--;
1154 
1155       t0 = VIS_LD_U16_I(table1, 2 * s0);
1156       acc0 = vis_faligndata(t0, acc0);
1157 
1158       s0 = (mlib_s32) * sp;
1159       sp--;
1160 
1161       t0 = VIS_LD_U16_I(table0, 2 * s0);
1162       acc0 = vis_faligndata(t0, acc0);
1163     }
1164     else if (num == 3) {
1165       s0 = (mlib_s32) * sp;
1166       sp--;
1167 
1168       t0 = VIS_LD_U16_I(table2, 2 * s0);
1169       acc0 = vis_faligndata(t0, acc0);
1170 
1171       s0 = (mlib_s32) * sp;
1172       sp--;
1173 
1174       t0 = VIS_LD_U16_I(table1, 2 * s0);
1175       acc0 = vis_faligndata(t0, acc0);
1176 
1177       s0 = (mlib_s32) * sp;
1178       sp--;
1179 
1180       t0 = VIS_LD_U16_I(table0, 2 * s0);
1181       acc0 = vis_faligndata(t0, acc0);
1182     }
1183 
1184     emask = vis_edge16(dp, dend);
1185     vis_pst_16(acc0, dp, emask);
1186   }
1187 }
1188 
1189 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(const mlib_u8 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 * table0,const mlib_s16 * table1,const mlib_s16 * table2)1190 void mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(const mlib_u8  *src,
1191                                             mlib_s16       *dst,
1192                                             mlib_s32       xsize,
1193                                             const mlib_s16 *table0,
1194                                             const mlib_s16 *table1,
1195                                             const mlib_s16 *table2)
1196 {
1197   mlib_u32 *sa;                        /* aligned pointer to source data */
1198   mlib_u8 *sp;                         /* pointer to source data */
1199   mlib_u32 s0, s1, s2, s3;             /* source data */
1200   mlib_s16 *dl;                        /* pointer to start of destination */
1201   mlib_s16 *dend;                      /* pointer to end of destination */
1202   mlib_d64 *dp;                        /* aligned pointer to destination */
1203   mlib_d64 t0, t1, t2;                 /* destination data */
1204   mlib_d64 t3, t4, t5;                 /* destination data */
1205   mlib_d64 t6, t7, t8;                 /* destination data */
1206   mlib_d64 t9, t10, t11;               /* destination data */
1207   mlib_d64 acc0, acc1, acc2;           /* destination data */
1208   mlib_s32 emask;                      /* edge mask */
1209   mlib_s32 i, num;                     /* loop variable */
1210   const mlib_s16 *table;
1211 
1212   sa = (mlib_u32 *) (src - 2);
1213   dl = dst;
1214   dp = (mlib_d64 *) dl;
1215   dend = dl + xsize - 1;
1216 
1217   vis_alignaddr((void *)0, 6);
1218 
1219   i = 0;
1220 
1221   s0 = *sa++;
1222 
1223   if (xsize >= 12) {
1224 
1225     s1 = sa[0];
1226     s2 = sa[1];
1227     s3 = sa[2];
1228     sa += 3;
1229 
1230 #pragma pipeloop(0)
1231     for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
1232       t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1233       t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1234       t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1235       t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1236       t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1237       t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1238       t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1239       t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1240       t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE);
1241       t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE);
1242       t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE);
1243       t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE);
1244       acc0 = vis_faligndata(t3, acc0);
1245       acc0 = vis_faligndata(t2, acc0);
1246       acc0 = vis_faligndata(t1, acc0);
1247       acc0 = vis_faligndata(t0, acc0);
1248       acc1 = vis_faligndata(t7, acc1);
1249       acc1 = vis_faligndata(t6, acc1);
1250       acc1 = vis_faligndata(t5, acc1);
1251       acc1 = vis_faligndata(t4, acc1);
1252       acc2 = vis_faligndata(t11, acc2);
1253       acc2 = vis_faligndata(t10, acc2);
1254       acc2 = vis_faligndata(t9, acc2);
1255       acc2 = vis_faligndata(t8, acc2);
1256       s0 = s3;
1257       s1 = sa[0];
1258       s2 = sa[1];
1259       s3 = sa[2];
1260       dp[0] = acc0;
1261       dp[1] = acc1;
1262       dp[2] = acc2;
1263     }
1264 
1265     t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1266     t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1267     t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1268     t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1269     t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1270     t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1271     t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1272     t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1273     t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE);
1274     t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE);
1275     t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE);
1276     t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE);
1277     acc0 = vis_faligndata(t3, acc0);
1278     acc0 = vis_faligndata(t2, acc0);
1279     acc0 = vis_faligndata(t1, acc0);
1280     acc0 = vis_faligndata(t0, acc0);
1281     acc1 = vis_faligndata(t7, acc1);
1282     acc1 = vis_faligndata(t6, acc1);
1283     acc1 = vis_faligndata(t5, acc1);
1284     acc1 = vis_faligndata(t4, acc1);
1285     acc2 = vis_faligndata(t11, acc2);
1286     acc2 = vis_faligndata(t10, acc2);
1287     acc2 = vis_faligndata(t9, acc2);
1288     acc2 = vis_faligndata(t8, acc2);
1289     dp[0] = acc0;
1290     dp[1] = acc1;
1291     dp[2] = acc2;
1292     s0 = s3;
1293     dp += 3;
1294     i += 12;
1295   }
1296 
1297   if (i <= xsize - 8) {
1298     s1 = sa[0];
1299     s2 = sa[1];
1300     t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1301     t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1302     t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1303     t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1304     t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1305     t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1306     t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1307     t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1308     acc0 = vis_faligndata(t3, acc0);
1309     acc0 = vis_faligndata(t2, acc0);
1310     acc0 = vis_faligndata(t1, acc0);
1311     acc0 = vis_faligndata(t0, acc0);
1312     acc1 = vis_faligndata(t7, acc1);
1313     acc1 = vis_faligndata(t6, acc1);
1314     acc1 = vis_faligndata(t5, acc1);
1315     acc1 = vis_faligndata(t4, acc1);
1316     dp[0] = acc0;
1317     dp[1] = acc1;
1318     table = table0;
1319     table0 = table2;
1320     table2 = table1;
1321     table1 = table;
1322     sa += 2;
1323     i += 8;
1324     dp += 2;
1325     s0 = s2;
1326   }
1327 
1328   if (i <= xsize - 4) {
1329     s1 = sa[0];
1330     t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1331     t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1332     t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1333     t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1334     acc0 = vis_faligndata(t3, acc0);
1335     acc0 = vis_faligndata(t2, acc0);
1336     acc0 = vis_faligndata(t1, acc0);
1337     acc0 = vis_faligndata(t0, acc0);
1338     dp[0] = acc0;
1339     table = table0;
1340     table0 = table1;
1341     table1 = table2;
1342     table2 = table;
1343     sa++;
1344     i += 4;
1345     dp++;
1346     s0 = s1;
1347   }
1348 
1349   sp = (mlib_u8 *) sa;
1350   sp -= 2;
1351 
1352   if ((mlib_addr) dp <= (mlib_addr) dend) {
1353 
1354     num = (mlib_s16 *) dend - (mlib_s16 *) dp;
1355     sp += num;
1356     num++;
1357 
1358     if (num == 1) {
1359       s0 = (mlib_s32) * sp;
1360       sp--;
1361 
1362       t0 = VIS_LD_U16_I(table0, 2 * s0);
1363       acc0 = vis_faligndata(t0, acc0);
1364     }
1365     else if (num == 2) {
1366       s0 = (mlib_s32) * sp;
1367       sp--;
1368 
1369       t0 = VIS_LD_U16_I(table1, 2 * s0);
1370       acc0 = vis_faligndata(t0, acc0);
1371 
1372       s0 = (mlib_s32) * sp;
1373       sp--;
1374 
1375       t0 = VIS_LD_U16_I(table0, 2 * s0);
1376       acc0 = vis_faligndata(t0, acc0);
1377     }
1378     else if (num == 3) {
1379       s0 = (mlib_s32) * sp;
1380       sp--;
1381 
1382       t0 = VIS_LD_U16_I(table2, 2 * s0);
1383       acc0 = vis_faligndata(t0, acc0);
1384 
1385       s0 = (mlib_s32) * sp;
1386       sp--;
1387 
1388       t0 = VIS_LD_U16_I(table1, 2 * s0);
1389       acc0 = vis_faligndata(t0, acc0);
1390 
1391       s0 = (mlib_s32) * sp;
1392       sp--;
1393 
1394       t0 = VIS_LD_U16_I(table0, 2 * s0);
1395       acc0 = vis_faligndata(t0, acc0);
1396     }
1397 
1398     emask = vis_edge16(dp, dend);
1399     vis_pst_16(acc0, dp, emask);
1400   }
1401 }
1402 
1403 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(const mlib_u8 * src,mlib_s16 * dst,mlib_s32 xsize,const mlib_s16 * table0,const mlib_s16 * table1,const mlib_s16 * table2)1404 void mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(const mlib_u8  *src,
1405                                             mlib_s16       *dst,
1406                                             mlib_s32       xsize,
1407                                             const mlib_s16 *table0,
1408                                             const mlib_s16 *table1,
1409                                             const mlib_s16 *table2)
1410 {
1411   mlib_u32 *sa;                        /* aligned pointer to source data */
1412   mlib_u8 *sp;                         /* pointer to source data */
1413   mlib_u32 s0, s1, s2, s3;             /* source data */
1414   mlib_s16 *dl;                        /* pointer to start of destination */
1415   mlib_s16 *dend;                      /* pointer to end of destination */
1416   mlib_d64 *dp;                        /* aligned pointer to destination */
1417   mlib_d64 t0, t1, t2;                 /* destination data */
1418   mlib_d64 t3, t4, t5;                 /* destination data */
1419   mlib_d64 t6, t7, t8;                 /* destination data */
1420   mlib_d64 t9, t10, t11;               /* destination data */
1421   mlib_d64 acc0, acc1, acc2;           /* destination data */
1422   mlib_s32 emask;                      /* edge mask */
1423   mlib_s32 i, num;                     /* loop variable */
1424   const mlib_s16 *table;
1425 
1426   sa = (mlib_u32 *) (src - 3);
1427   dl = dst;
1428   dp = (mlib_d64 *) dl;
1429   dend = dl + xsize - 1;
1430 
1431   vis_alignaddr((void *)0, 6);
1432 
1433   i = 0;
1434 
1435   s0 = *sa++;
1436 
1437   if (xsize >= 12) {
1438 
1439     s1 = sa[0];
1440     s2 = sa[1];
1441     s3 = sa[2];
1442     sa += 3;
1443 
1444 #pragma pipeloop(0)
1445     for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
1446       t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1447       t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1448       t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1449       t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1450       t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1451       t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1452       t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1453       t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1454       t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE);
1455       t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE);
1456       t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE);
1457       t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
1458       acc0 = vis_faligndata(t3, acc0);
1459       acc0 = vis_faligndata(t2, acc0);
1460       acc0 = vis_faligndata(t1, acc0);
1461       acc0 = vis_faligndata(t0, acc0);
1462       acc1 = vis_faligndata(t7, acc1);
1463       acc1 = vis_faligndata(t6, acc1);
1464       acc1 = vis_faligndata(t5, acc1);
1465       acc1 = vis_faligndata(t4, acc1);
1466       acc2 = vis_faligndata(t11, acc2);
1467       acc2 = vis_faligndata(t10, acc2);
1468       acc2 = vis_faligndata(t9, acc2);
1469       acc2 = vis_faligndata(t8, acc2);
1470       s0 = s3;
1471       s1 = sa[0];
1472       s2 = sa[1];
1473       s3 = sa[2];
1474       dp[0] = acc0;
1475       dp[1] = acc1;
1476       dp[2] = acc2;
1477     }
1478 
1479     t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1480     t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1481     t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1482     t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1483     t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1484     t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1485     t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1486     t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1487     t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE);
1488     t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE);
1489     t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE);
1490     t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
1491     acc0 = vis_faligndata(t3, acc0);
1492     acc0 = vis_faligndata(t2, acc0);
1493     acc0 = vis_faligndata(t1, acc0);
1494     acc0 = vis_faligndata(t0, acc0);
1495     acc1 = vis_faligndata(t7, acc1);
1496     acc1 = vis_faligndata(t6, acc1);
1497     acc1 = vis_faligndata(t5, acc1);
1498     acc1 = vis_faligndata(t4, acc1);
1499     acc2 = vis_faligndata(t11, acc2);
1500     acc2 = vis_faligndata(t10, acc2);
1501     acc2 = vis_faligndata(t9, acc2);
1502     acc2 = vis_faligndata(t8, acc2);
1503     dp[0] = acc0;
1504     dp[1] = acc1;
1505     dp[2] = acc2;
1506     s0 = s3;
1507     dp += 3;
1508     i += 12;
1509   }
1510 
1511   if (i <= xsize - 8) {
1512     s1 = sa[0];
1513     s2 = sa[1];
1514     t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1515     t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1516     t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1517     t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1518     t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1519     t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1520     t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1521     t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1522     acc0 = vis_faligndata(t3, acc0);
1523     acc0 = vis_faligndata(t2, acc0);
1524     acc0 = vis_faligndata(t1, acc0);
1525     acc0 = vis_faligndata(t0, acc0);
1526     acc1 = vis_faligndata(t7, acc1);
1527     acc1 = vis_faligndata(t6, acc1);
1528     acc1 = vis_faligndata(t5, acc1);
1529     acc1 = vis_faligndata(t4, acc1);
1530     dp[0] = acc0;
1531     dp[1] = acc1;
1532     table = table0;
1533     table0 = table2;
1534     table2 = table1;
1535     table1 = table;
1536     sa += 2;
1537     i += 8;
1538     dp += 2;
1539     s0 = s2;
1540   }
1541 
1542   if (i <= xsize - 4) {
1543     s1 = sa[0];
1544     t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1545     t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1546     t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1547     t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1548     acc0 = vis_faligndata(t3, acc0);
1549     acc0 = vis_faligndata(t2, acc0);
1550     acc0 = vis_faligndata(t1, acc0);
1551     acc0 = vis_faligndata(t0, acc0);
1552     dp[0] = acc0;
1553     table = table0;
1554     table0 = table1;
1555     table1 = table2;
1556     table2 = table;
1557     sa++;
1558     i += 4;
1559     dp++;
1560     s0 = s1;
1561   }
1562 
1563   sp = (mlib_u8 *) sa;
1564   sp -= 1;
1565 
1566   if ((mlib_addr) dp <= (mlib_addr) dend) {
1567 
1568     num = (mlib_s16 *) dend - (mlib_s16 *) dp;
1569     sp += num;
1570     num++;
1571 
1572     if (num == 1) {
1573       s0 = (mlib_s32) * sp;
1574       sp--;
1575 
1576       t0 = VIS_LD_U16_I(table0, 2 * s0);
1577       acc0 = vis_faligndata(t0, acc0);
1578     }
1579     else if (num == 2) {
1580       s0 = (mlib_s32) * sp;
1581       sp--;
1582 
1583       t0 = VIS_LD_U16_I(table1, 2 * s0);
1584       acc0 = vis_faligndata(t0, acc0);
1585 
1586       s0 = (mlib_s32) * sp;
1587       sp--;
1588 
1589       t0 = VIS_LD_U16_I(table0, 2 * s0);
1590       acc0 = vis_faligndata(t0, acc0);
1591     }
1592     else if (num == 3) {
1593       s0 = (mlib_s32) * sp;
1594       sp--;
1595 
1596       t0 = VIS_LD_U16_I(table2, 2 * s0);
1597       acc0 = vis_faligndata(t0, acc0);
1598 
1599       s0 = (mlib_s32) * sp;
1600       sp--;
1601 
1602       t0 = VIS_LD_U16_I(table1, 2 * s0);
1603       acc0 = vis_faligndata(t0, acc0);
1604 
1605       s0 = (mlib_s32) * sp;
1606       sp--;
1607 
1608       t0 = VIS_LD_U16_I(table0, 2 * s0);
1609       acc0 = vis_faligndata(t0, acc0);
1610     }
1611 
1612     emask = vis_edge16(dp, dend);
1613     vis_pst_16(acc0, dp, emask);
1614   }
1615 }
1616 
1617 /***************************************************************/
mlib_v_ImageLookUp_U8_S16_3(const mlib_u8 * src,mlib_s32 slb,mlib_s16 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_s16 ** table)1618 void mlib_v_ImageLookUp_U8_S16_3(const mlib_u8  *src,
1619                                  mlib_s32       slb,
1620                                  mlib_s16       *dst,
1621                                  mlib_s32       dlb,
1622                                  mlib_s32       xsize,
1623                                  mlib_s32       ysize,
1624                                  const mlib_s16 **table)
1625 {
1626   mlib_u8 *sl;
1627   mlib_s16 *dl;
1628   const mlib_s16 *tab;
1629   mlib_s32 j, i;
1630 
1631   sl = (void *)src;
1632   dl = dst;
1633 
1634   /* row loop */
1635   for (j = 0; j < ysize; j++) {
1636     mlib_u8 *sp = sl;
1637     mlib_s16 *dp = dl;
1638     const mlib_s16 *tab0 = table[0];
1639     const mlib_s16 *tab1 = table[1];
1640     const mlib_s16 *tab2 = table[2];
1641     mlib_s32 off, size = xsize * 3;
1642 
1643     off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
1644 
1645     off = (off < size) ? off : size;
1646 
1647     for (i = 0; i < off - 2; i += 3) {
1648       *dp++ = tab0[(*sp++)];
1649       *dp++ = tab1[(*sp++)];
1650       *dp++ = tab2[(*sp++)];
1651       size -= 3;
1652     }
1653 
1654     off -= i;
1655 
1656     if (off == 1) {
1657       *dp++ = tab0[(*sp++)];
1658       tab = tab0;
1659       tab0 = tab1;
1660       tab1 = tab2;
1661       tab2 = tab;
1662       size--;
1663     }
1664     else if (off == 2) {
1665       *dp++ = tab0[(*sp++)];
1666       *dp++ = tab1[(*sp++)];
1667       tab = tab2;
1668       tab2 = tab1;
1669       tab1 = tab0;
1670       tab0 = tab;
1671       size -= 2;
1672     }
1673 
1674     if (size > 0) {
1675 
1676       off = (mlib_addr) sp & 3;
1677 
1678       if (off == 0) {
1679         mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2);
1680       }
1681       else if (off == 1) {
1682         mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2);
1683       }
1684       else if (off == 2) {
1685         mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2);
1686       }
1687       else {
1688         mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2);
1689       }
1690     }
1691 
1692     sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
1693     dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
1694   }
1695 }
1696 
1697 /***************************************************************/
1698