1 /*
2  * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 
27 
28 #include "vis_proto.h"
29 #include "mlib_image.h"
30 #include "mlib_v_ImageLookUpFunc.h"
31 
32 /***************************************************************/
33 static void mlib_v_ImageLookUpSI_S16_S32_2_D1(const mlib_s16 *src,
34                                               mlib_f32       *dst,
35                                               mlib_s32       xsize,
36                                               const mlib_s32 **table);
37 
38 static void mlib_v_ImageLookUpSI_S16_S32_3_D1(const mlib_s16 *src,
39                                               mlib_f32       *dst,
40                                               mlib_s32       xsize,
41                                               const mlib_s32 **table);
42 
43 static void mlib_v_ImageLookUpSI_S16_S32_4_D1(const mlib_s16 *src,
44                                               mlib_f32       *dst,
45                                               mlib_s32       xsize,
46                                               const mlib_s32 **table);
47 
48 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S32_2_D1(const mlib_s16 * src,mlib_f32 * dst,mlib_s32 xsize,const mlib_s32 ** table)49 void mlib_v_ImageLookUpSI_S16_S32_2_D1(const mlib_s16 *src,
50                                        mlib_f32       *dst,
51                                        mlib_s32       xsize,
52                                        const mlib_s32 **table)
53 {
54   mlib_s32 *sa;          /* aligned pointer to source data */
55   mlib_s16 *sp;          /* pointer to source data */
56   mlib_s32 s0;           /* source data */
57   mlib_f32 *dp;          /* aligned pointer to destination */
58   mlib_f32 acc0, acc1;   /* destination data */
59   mlib_f32 acc2, acc3;   /* destination data */
60   mlib_s32 i;            /* loop variable */
61   mlib_f32 *table0 = (mlib_f32*)(&table[0][32768]);
62   mlib_f32 *table1 = (mlib_f32*)(&table[1][32768]);
63   mlib_s32 s00, s01;
64 
65   sa   = (mlib_s32*)src;
66   dp   = dst;
67 
68   i = 0;
69 
70   if (xsize >= 2) {
71 
72     s0 = *sa++;
73 
74 #pragma pipeloop(0)
75     for(i = 0; i <= xsize - 4; i+=2, dp += 4) {
76       s00 = (s0 >> 14) & (~3);
77       s01 = ((s0 << 16) >> 14);
78       acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
79       acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
80       acc2 = *(mlib_f32*)((mlib_u8*)table0 + s01);
81       acc3 = *(mlib_f32*)((mlib_u8*)table1 + s01);
82       s0 = *sa++;
83       dp[0] = acc0;
84       dp[1] = acc1;
85       dp[2] = acc2;
86       dp[3] = acc3;
87     }
88 
89     s00 = (s0 >> 14) & (~3);
90     s01 = ((s0 << 16) >> 14);
91     acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
92     acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
93     acc2 = *(mlib_f32*)((mlib_u8*)table0 + s01);
94     acc3 = *(mlib_f32*)((mlib_u8*)table1 + s01);
95     dp[0] = acc0;
96     dp[1] = acc1;
97     dp[2] = acc2;
98     dp[3] = acc3;
99     dp += 4;
100     i += 2;
101   }
102 
103   sp = (mlib_s16*)sa;
104 
105   if ( i < xsize ) {
106     *dp++ = table0[sp[0]];
107     *dp++ = table1[sp[0]];
108   }
109 }
110 
111 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S32_2(const mlib_s16 * src,mlib_s32 slb,mlib_s32 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_s32 ** table)112 void mlib_v_ImageLookUpSI_S16_S32_2(const mlib_s16 *src,
113                                     mlib_s32       slb,
114                                     mlib_s32       *dst,
115                                     mlib_s32       dlb,
116                                     mlib_s32       xsize,
117                                     mlib_s32       ysize,
118                                     const mlib_s32 **table)
119 {
120   mlib_s16 *sl;
121   mlib_s32 *dl;
122   mlib_s32 j;
123   const mlib_s32 *tab0 = &table[0][32768];
124   const mlib_s32 *tab1 = &table[1][32768];
125 
126   sl = (void *)src;
127   dl = dst;
128 
129   /* row loop */
130   for (j = 0; j < ysize; j ++) {
131     mlib_s16 *sp = sl;
132     mlib_s32 *dp = dl;
133     mlib_s32 s0, size = xsize;
134 
135     if (((mlib_addr)sp & 3) != 0) {
136       s0 = *sp++;
137       *dp++ = tab0[s0];
138       *dp++ = tab1[s0];
139       size--;
140     }
141 
142     if (size > 0) {
143       mlib_v_ImageLookUpSI_S16_S32_2_D1(sp, (mlib_f32*)dp, size, table);
144     }
145 
146     sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
147     dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
148   }
149 }
150 
151 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S32_3_D1(const mlib_s16 * src,mlib_f32 * dst,mlib_s32 xsize,const mlib_s32 ** table)152 void mlib_v_ImageLookUpSI_S16_S32_3_D1(const mlib_s16 *src,
153                                        mlib_f32       *dst,
154                                        mlib_s32       xsize,
155                                        const mlib_s32 **table)
156 {
157   mlib_s32 *sa;          /* aligned pointer to source data */
158   mlib_s16 *sp;          /* pointer to source data */
159   mlib_s32 s0;           /* source data */
160   mlib_f32 *dp;          /* aligned pointer to destination */
161   mlib_f32 acc0, acc1;   /* destination data */
162   mlib_f32 acc2, acc3;   /* destination data */
163   mlib_f32 acc4, acc5;   /* destination data */
164   mlib_s32 i;            /* loop variable */
165   mlib_f32 *table0 = (mlib_f32*)(&table[0][32768]);
166   mlib_f32 *table1 = (mlib_f32*)(&table[1][32768]);
167   mlib_f32 *table2 = (mlib_f32*)(&table[2][32768]);
168   mlib_s32 s00, s01;
169 
170   sa   = (mlib_s32*)src;
171   dp   = dst;
172 
173   i = 0;
174 
175   if (xsize >= 2) {
176 
177     s0 = *sa++;
178 
179 #pragma pipeloop(0)
180     for(i = 0; i <= xsize - 4; i+=2, dp += 6) {
181       s00 = (s0 >> 14) & (~3);
182       s01 = ((s0 << 16) >> 14);
183       acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
184       acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
185       acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
186       acc3 = *(mlib_f32*)((mlib_u8*)table0 + s01);
187       acc4 = *(mlib_f32*)((mlib_u8*)table1 + s01);
188       acc5 = *(mlib_f32*)((mlib_u8*)table2 + s01);
189       s0 = *sa++;
190       dp[0] = acc0;
191       dp[1] = acc1;
192       dp[2] = acc2;
193       dp[3] = acc3;
194       dp[4] = acc4;
195       dp[5] = acc5;
196     }
197 
198     s00 = (s0 >> 14) & (~3);
199     s01 = ((s0 << 16) >> 14);
200     acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
201     acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
202     acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
203     acc3 = *(mlib_f32*)((mlib_u8*)table0 + s01);
204     acc4 = *(mlib_f32*)((mlib_u8*)table1 + s01);
205     acc5 = *(mlib_f32*)((mlib_u8*)table2 + s01);
206     dp[0] = acc0;
207     dp[1] = acc1;
208     dp[2] = acc2;
209     dp[3] = acc3;
210     dp[4] = acc4;
211     dp[5] = acc5;
212     dp += 6;
213     i += 2;
214   }
215 
216   sp = (mlib_s16*)sa;
217 
218   if ( i < xsize ) {
219     *dp++ = table0[sp[0]];
220     *dp++ = table1[sp[0]];
221     *dp++ = table2[sp[0]];
222   }
223 }
224 
225 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S32_3(const mlib_s16 * src,mlib_s32 slb,mlib_s32 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_s32 ** table)226 void mlib_v_ImageLookUpSI_S16_S32_3(const mlib_s16 *src,
227                                     mlib_s32       slb,
228                                     mlib_s32       *dst,
229                                     mlib_s32       dlb,
230                                     mlib_s32       xsize,
231                                     mlib_s32       ysize,
232                                     const mlib_s32 **table)
233 {
234   mlib_s16 *sl;
235   mlib_s32 *dl;
236   mlib_s32 j;
237   const mlib_s32 *tab0 = &table[0][32768];
238   const mlib_s32 *tab1 = &table[1][32768];
239   const mlib_s32 *tab2 = &table[2][32768];
240 
241   sl = (void *)src;
242   dl = dst;
243 
244   /* row loop */
245   for (j = 0; j < ysize; j ++) {
246     mlib_s16 *sp = sl;
247     mlib_s32 *dp = dl;
248     mlib_s32 s0, size = xsize;
249 
250     if (((mlib_addr)sp & 3) != 0) {
251       s0 = *sp++;
252       *dp++ = tab0[s0];
253       *dp++ = tab1[s0];
254       *dp++ = tab2[s0];
255       size--;
256     }
257 
258     if (size > 0) {
259       mlib_v_ImageLookUpSI_S16_S32_3_D1(sp, (mlib_f32*)dp, size, table);
260     }
261 
262     sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
263     dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
264   }
265 }
266 
267 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S32_4_D1(const mlib_s16 * src,mlib_f32 * dst,mlib_s32 xsize,const mlib_s32 ** table)268 void mlib_v_ImageLookUpSI_S16_S32_4_D1(const mlib_s16 *src,
269                                        mlib_f32       *dst,
270                                        mlib_s32       xsize,
271                                        const mlib_s32 **table)
272 {
273   mlib_s32 *sa;          /* aligned pointer to source data */
274   mlib_s16 *sp;          /* pointer to source data */
275   mlib_s32 s0;           /* source data */
276   mlib_f32 *dp;          /* aligned pointer to destination */
277   mlib_f32 acc0, acc1;   /* destination data */
278   mlib_f32 acc2, acc3;   /* destination data */
279   mlib_f32 acc4, acc5;   /* destination data */
280   mlib_f32 acc6, acc7;   /* destination data */
281   mlib_s32 i;            /* loop variable */
282   mlib_f32 *table0 = (mlib_f32*)(&table[0][32768]);
283   mlib_f32 *table1 = (mlib_f32*)(&table[1][32768]);
284   mlib_f32 *table2 = (mlib_f32*)(&table[2][32768]);
285   mlib_f32 *table3 = (mlib_f32*)(&table[3][32768]);
286   mlib_s32 s00, s01;
287 
288   sa   = (mlib_s32*)src;
289   dp   = dst;
290 
291   i = 0;
292 
293   if (xsize >= 2) {
294 
295     s0 = *sa++;
296 
297 #pragma pipeloop(0)
298     for(i = 0; i <= xsize - 4; i+=2, dp += 8) {
299       s00 = (s0 >> 14) & (~3);
300       s01 = ((s0 << 16) >> 14);
301       acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
302       acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
303       acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
304       acc3 = *(mlib_f32*)((mlib_u8*)table3 + s00);
305       acc4 = *(mlib_f32*)((mlib_u8*)table0 + s01);
306       acc5 = *(mlib_f32*)((mlib_u8*)table1 + s01);
307       acc6 = *(mlib_f32*)((mlib_u8*)table2 + s01);
308       acc7 = *(mlib_f32*)((mlib_u8*)table3 + s01);
309       s0 = *sa++;
310       dp[0] = acc0;
311       dp[1] = acc1;
312       dp[2] = acc2;
313       dp[3] = acc3;
314       dp[4] = acc4;
315       dp[5] = acc5;
316       dp[6] = acc6;
317       dp[7] = acc7;
318     }
319 
320     s00 = (s0 >> 14) & (~3);
321     s01 = ((s0 << 16) >> 14);
322     acc0 = *(mlib_f32*)((mlib_u8*)table0 + s00);
323     acc1 = *(mlib_f32*)((mlib_u8*)table1 + s00);
324     acc2 = *(mlib_f32*)((mlib_u8*)table2 + s00);
325     acc3 = *(mlib_f32*)((mlib_u8*)table3 + s00);
326     acc4 = *(mlib_f32*)((mlib_u8*)table0 + s01);
327     acc5 = *(mlib_f32*)((mlib_u8*)table1 + s01);
328     acc6 = *(mlib_f32*)((mlib_u8*)table2 + s01);
329     acc7 = *(mlib_f32*)((mlib_u8*)table3 + s01);
330     dp[0] = acc0;
331     dp[1] = acc1;
332     dp[2] = acc2;
333     dp[3] = acc3;
334     dp[4] = acc4;
335     dp[5] = acc5;
336     dp[6] = acc6;
337     dp[7] = acc7;
338     dp += 8;
339     i += 2;
340   }
341 
342   sp = (mlib_s16*)sa;
343 
344   if ( i < xsize ) {
345     *dp++ = table0[sp[0]];
346     *dp++ = table1[sp[0]];
347     *dp++ = table2[sp[0]];
348     *dp++ = table3[sp[0]];
349   }
350 }
351 
352 /***************************************************************/
mlib_v_ImageLookUpSI_S16_S32_4(const mlib_s16 * src,mlib_s32 slb,mlib_s32 * dst,mlib_s32 dlb,mlib_s32 xsize,mlib_s32 ysize,const mlib_s32 ** table)353 void mlib_v_ImageLookUpSI_S16_S32_4(const mlib_s16 *src,
354                                     mlib_s32       slb,
355                                     mlib_s32       *dst,
356                                     mlib_s32       dlb,
357                                     mlib_s32       xsize,
358                                     mlib_s32       ysize,
359                                     const mlib_s32 **table)
360 {
361   mlib_s16 *sl;
362   mlib_s32 *dl;
363   mlib_s32 j;
364   const mlib_s32 *tab0 = &table[0][32768];
365   const mlib_s32 *tab1 = &table[1][32768];
366   const mlib_s32 *tab2 = &table[2][32768];
367   const mlib_s32 *tab3 = &table[3][32768];
368 
369   sl = (void *)src;
370   dl = dst;
371 
372   /* row loop */
373   for (j = 0; j < ysize; j ++) {
374     mlib_s16 *sp = sl;
375     mlib_s32 *dp = dl;
376     mlib_s32 s0, size = xsize;
377 
378     if (((mlib_addr)sp & 3) != 0) {
379       s0 = *sp++;
380       *dp++ = tab0[s0];
381       *dp++ = tab1[s0];
382       *dp++ = tab2[s0];
383       *dp++ = tab3[s0];
384       size--;
385     }
386 
387     if (size > 0) {
388       mlib_v_ImageLookUpSI_S16_S32_4_D1(sp, (mlib_f32*)dp, size, table);
389     }
390 
391     sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
392     dl = (mlib_s32 *) ((mlib_u8 *) dl + dlb);
393   }
394 }
395 
396 /***************************************************************/
397