1 /*
2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 #if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
27 
28 #include "vis_AlphaMacros.h"
29 
30 /***************************************************************/
31 
32 #define ARGB_to_GBGR(x)        \
33     (x << 16) | (x & 0xff00) | ((x >> 16) & 0xff)
34 
35 /***************************************************************/
36 
37 #define ARGB_to_BGR(x)         \
38     ((x << 16) & 0xff0000) | (x & 0xff00) | ((x >> 16) & 0xff)
39 
40 /***************************************************************/
41 
42 #define READ_Bgr(i)    \
43     (src[3*i] << 16) | (src[3*i + 1] << 8) | src[3*i + 2]
44 
45 /***************************************************************/
46 
47 #define ARGB_to_GBGR_FL2(dst, src0, src1) {                    \
48     mlib_d64 t0, t1, t2;                                       \
49     t0 = vis_fpmerge(src0, src1);                              \
50     t1 = vis_fpmerge(vis_read_lo(t0), vis_read_hi(t0));        \
51     t2 = vis_fpmerge(vis_read_lo(t0), vis_read_lo(t0));        \
52     dst = vis_fpmerge(vis_read_hi(t2), vis_read_lo(t1));       \
53 }
54 
55 /***************************************************************/
56 
57 #define ARGB_to_BGR_FL2(dst, src0, src1) {                     \
58     mlib_d64 t0, t1, t2;                                       \
59     t0 = vis_fpmerge(src0, src1);                              \
60     t1 = vis_fpmerge(vis_read_lo(t0), vis_read_hi(t0));        \
61     t2 = vis_fpmerge(vis_fzeros(),    vis_read_lo(t0));        \
62     dst = vis_fpmerge(vis_read_hi(t2), vis_read_lo(t1));       \
63 }
64 
65 /***************************************************************/
66 
ADD_SUFF(IntBgrToIntArgbConvert)67 void ADD_SUFF(IntBgrToIntArgbConvert)(BLIT_PARAMS)
68 {
69     mlib_s32 dstScan = pDstInfo->scanStride;
70     mlib_s32 srcScan = pSrcInfo->scanStride;
71     mlib_d64 dd, amask;
72     mlib_s32 i, i0, j, x;
73 
74     if (dstScan == 4*width && srcScan == 4*width) {
75         width *= height;
76         height = 1;
77     }
78 
79     amask = vis_to_double_dup(0xFF000000);
80     vis_alignaddr(NULL, 7);
81 
82     for (j = 0; j < height; j++) {
83         mlib_u32 *src = srcBase;
84         mlib_u32 *dst = dstBase;
85 
86         i = i0 = 0;
87 
88         if ((mlib_s32)dst & 7) {
89             x = src[i];
90             dst[i] = 0xff000000 | ARGB_to_GBGR(x);
91             i0 = 1;
92         }
93 
94 #pragma pipeloop(0)
95         for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
96             ARGB2ABGR_FL2(dd, ((mlib_f32*)src)[i], ((mlib_f32*)src)[i + 1]);
97             *(mlib_d64*)(dst + i) = vis_for(dd, amask);
98         }
99 
100         if (i < width) {
101             x = src[i];
102             dst[i] = 0xff000000 | ARGB_to_GBGR(x);
103         }
104 
105         PTR_ADD(dstBase, dstScan);
106         PTR_ADD(srcBase, srcScan);
107     }
108 }
109 
110 /***************************************************************/
111 
ADD_SUFF(IntBgrToIntArgbScaleConvert)112 void ADD_SUFF(IntBgrToIntArgbScaleConvert)(SCALE_PARAMS)
113 {
114     mlib_s32 dstScan = pDstInfo->scanStride;
115     mlib_s32 srcScan = pSrcInfo->scanStride;
116     mlib_d64 dd, amask;
117     mlib_s32 j, x;
118 
119     amask = vis_to_double_dup(0xFF000000);
120     vis_alignaddr(NULL, 7);
121 
122     for (j = 0; j < height; j++) {
123         mlib_u32 *src = srcBase;
124         mlib_u32 *dst = dstBase;
125         mlib_u32 *dst_end = dst + width;
126         mlib_s32 tmpsxloc = sxloc;
127 
128         PTR_ADD(src, (syloc >> shift) * srcScan);
129 
130         if ((mlib_s32)dst & 7) {
131             x = src[tmpsxloc >> shift];
132             *dst++ = 0xff000000 | ARGB_to_GBGR(x);
133             tmpsxloc += sxinc;
134         }
135 
136 #pragma pipeloop(0)
137         for (; dst <= dst_end - 2; dst += 2) {
138             ARGB2ABGR_FL2(dd, ((mlib_f32*)src)[tmpsxloc >> shift],
139                               ((mlib_f32*)src)[(tmpsxloc + sxinc) >> shift]);
140             *(mlib_d64*)dst = vis_for(dd, amask);
141             tmpsxloc += 2*sxinc;
142         }
143 
144         for (; dst < dst_end; dst++) {
145             x = src[tmpsxloc >> shift];
146             *dst++ = 0xff000000 | ARGB_to_GBGR(x);
147             tmpsxloc += sxinc;
148         }
149 
150         PTR_ADD(dstBase, dstScan);
151         syloc += syinc;
152     }
153 }
154 
155 /***************************************************************/
156 
ADD_SUFF(IntArgbToIntBgrConvert)157 void ADD_SUFF(IntArgbToIntBgrConvert)(BLIT_PARAMS)
158 {
159     mlib_s32 dstScan = pDstInfo->scanStride;
160     mlib_s32 srcScan = pSrcInfo->scanStride;
161     mlib_d64 dd;
162     mlib_s32 i, i0, j, x;
163 
164     if (dstScan == 4*width && srcScan == 4*width) {
165         width *= height;
166         height = 1;
167     }
168 
169     for (j = 0; j < height; j++) {
170         mlib_u32 *src = srcBase;
171         mlib_u32 *dst = dstBase;
172 
173         i = i0 = 0;
174 
175         if ((mlib_s32)dst & 7) {
176             x = src[i];
177             dst[i] = ARGB_to_GBGR(x);
178             i0 = 1;
179         }
180 
181 #pragma pipeloop(0)
182         for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
183             ARGB_to_GBGR_FL2(dd, ((mlib_f32*)src)[i], ((mlib_f32*)src)[i + 1]);
184             *(mlib_d64*)(dst + i) = dd;
185         }
186 
187         if (i < width) {
188             x = src[i];
189             dst[i] = ARGB_to_GBGR(x);
190         }
191 
192         PTR_ADD(dstBase, dstScan);
193         PTR_ADD(srcBase, srcScan);
194     }
195 }
196 
197 /***************************************************************/
198 
ADD_SUFF(IntArgbToIntBgrScaleConvert)199 void ADD_SUFF(IntArgbToIntBgrScaleConvert)(SCALE_PARAMS)
200 {
201     mlib_s32 dstScan = pDstInfo->scanStride;
202     mlib_s32 srcScan = pSrcInfo->scanStride;
203     mlib_d64 dd;
204     mlib_s32 j, x;
205 
206     for (j = 0; j < height; j++) {
207         mlib_u32 *src = srcBase;
208         mlib_u32 *dst = dstBase;
209         mlib_u32 *dst_end = dst + width;
210         mlib_s32 tmpsxloc = sxloc;
211 
212         PTR_ADD(src, (syloc >> shift) * srcScan);
213 
214         if ((mlib_s32)dst & 7) {
215             x = src[tmpsxloc >> shift];
216             *dst++ = ARGB_to_GBGR(x);
217             tmpsxloc += sxinc;
218         }
219 
220 #pragma pipeloop(0)
221         for (; dst <= dst_end - 2; dst += 2) {
222             ARGB_to_GBGR_FL2(dd, ((mlib_f32*)src)[tmpsxloc >> shift],
223                                  ((mlib_f32*)src)[(tmpsxloc + sxinc) >> shift]);
224             *(mlib_d64*)dst = dd;
225             tmpsxloc += 2*sxinc;
226         }
227 
228         for (; dst < dst_end; dst++) {
229             x = src[tmpsxloc >> shift];
230             *dst++ = ARGB_to_GBGR(x);
231             tmpsxloc += sxinc;
232         }
233 
234         PTR_ADD(dstBase, dstScan);
235         syloc += syinc;
236     }
237 }
238 
239 /***************************************************************/
240 
241 #define INSERT_U8_34R {                                        \
242     mlib_d64 sda, sdb, sdc, sdd;                               \
243     mlib_d64 sde, sdf, sdg, sdh;                               \
244     mlib_d64 sdi, sdj, sdk, sdl;                               \
245     mlib_d64 sdm;                                              \
246                                                                \
247     sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1));     \
248     sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2));     \
249     sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2));     \
250     sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb));     \
251     sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc));     \
252     sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc));     \
253     sdg = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde));     \
254     sdh = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf));     \
255     sdi = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf));     \
256     sdj = vis_fpmerge(vis_read_hi(sdg), vis_read_hi(sdi));     \
257     sdk = vis_fpmerge(vis_read_lo(sdg), vis_read_lo(sdi));     \
258     sdl = vis_fpmerge(vis_read_hi(sFF), vis_read_hi(sdh));     \
259     sdm = vis_fpmerge(vis_read_lo(sFF), vis_read_lo(sdh));     \
260     dd0 = vis_fpmerge(vis_read_hi(sdl), vis_read_hi(sdj));     \
261     dd1 = vis_fpmerge(vis_read_lo(sdl), vis_read_lo(sdj));     \
262     dd2 = vis_fpmerge(vis_read_hi(sdm), vis_read_hi(sdk));     \
263     dd3 = vis_fpmerge(vis_read_lo(sdm), vis_read_lo(sdk));     \
264 }
265 
266 /***************************************************************/
267 
ADD_SUFF(ThreeByteBgrToIntBgrConvert)268 void ADD_SUFF(ThreeByteBgrToIntBgrConvert)(BLIT_PARAMS)
269 {
270     mlib_s32 dstScan = pDstInfo->scanStride;
271     mlib_s32 srcScan = pSrcInfo->scanStride;
272     mlib_d64 *sp;
273     mlib_d64 sFF;
274     mlib_d64 s0, s1, s2, s3, sd0, sd1, sd2, dd0, dd1, dd2, dd3;
275     mlib_s32 i, i0, j;
276 
277     if (width < 16) {
278         for (j = 0; j < height; j++) {
279             mlib_u8  *src = srcBase;
280             mlib_u32 *dst = dstBase;
281 
282             for (i = 0; i < width; i++) {
283                 dst[i] = READ_Bgr(i);
284             }
285 
286             PTR_ADD(dstBase, dstScan);
287             PTR_ADD(srcBase, srcScan);
288         }
289         return;
290     }
291 
292     if (srcScan == 3*width && dstScan == 4*width) {
293         width *= height;
294         height = 1;
295     }
296 
297     sFF = vis_fzero();
298 
299     for (j = 0; j < height; j++) {
300         mlib_u8  *src = srcBase;
301         mlib_f32 *dst = dstBase;
302 
303         i = i0 = 0;
304 
305         if ((mlib_s32)dst & 7) {
306             ((mlib_s32*)dst)[i] = READ_Bgr(i);
307             i0 = 1;
308         }
309 
310         sp = vis_alignaddr(src, 3*i0);
311         s3 = *sp++;
312 
313 #pragma pipeloop(0)
314         for (i = i0; i <= (mlib_s32)width - 8; i += 8) {
315             s0 = s3;
316             s1 = *sp++;
317             s2 = *sp++;
318             s3 = *sp++;
319             sd0 = vis_faligndata(s0, s1);
320             sd1 = vis_faligndata(s1, s2);
321             sd2 = vis_faligndata(s2, s3);
322 
323             INSERT_U8_34R
324 
325             *(mlib_d64*)(dst + i    ) = dd0;
326             *(mlib_d64*)(dst + i + 2) = dd1;
327             *(mlib_d64*)(dst + i + 4) = dd2;
328             *(mlib_d64*)(dst + i + 6) = dd3;
329         }
330 
331         for (; i < width; i++) {
332             ((mlib_s32*)dst)[i] = READ_Bgr(i);
333         }
334 
335         PTR_ADD(dstBase, dstScan);
336         PTR_ADD(srcBase, srcScan);
337     }
338 }
339 
340 /***************************************************************/
341 
ADD_SUFF(ThreeByteBgrToIntBgrScaleConvert)342 void ADD_SUFF(ThreeByteBgrToIntBgrScaleConvert)(SCALE_PARAMS)
343 {
344     mlib_s32 dstScan = pDstInfo->scanStride;
345     mlib_s32 srcScan = pSrcInfo->scanStride;
346     mlib_d64 dd, dzero;
347     mlib_s32 i, i0, i1, j;
348 
349     if (width < 16) {
350         for (j = 0; j < height; j++) {
351             mlib_u8  *src = srcBase;
352             mlib_s32 *dst = dstBase;
353             mlib_s32 *dst_end = dst + width;
354             mlib_s32 tmpsxloc = sxloc;
355 
356             PTR_ADD(src, (syloc >> shift) * srcScan);
357 
358             for (; dst < dst_end; dst++) {
359                 i = tmpsxloc >> shift;
360                 tmpsxloc += sxinc;
361                 *(mlib_s32*)dst = READ_Bgr(i);
362             }
363 
364             PTR_ADD(dstBase, dstScan);
365             syloc += syinc;
366         }
367         return;
368     }
369 
370     dzero = vis_fzero();
371 
372     vis_alignaddr(NULL, 7);
373 
374     for (j = 0; j < height; j++) {
375         mlib_u8  *src = srcBase;
376         mlib_f32 *dst = dstBase;
377         mlib_f32 *dst_end = dst + width;
378         mlib_s32 tmpsxloc = sxloc;
379 
380         PTR_ADD(src, (syloc >> shift) * srcScan);
381 
382         if ((mlib_s32)dst & 7) {
383             i = tmpsxloc >> shift;
384             tmpsxloc += sxinc;
385             *(mlib_s32*)dst = READ_Bgr(i);
386             dst++;
387         }
388 
389 #pragma pipeloop(0)
390         for (; dst <= dst_end - 2; dst += 2) {
391             i0 = tmpsxloc >> shift;
392             i1 = (tmpsxloc + sxinc) >> shift;
393             tmpsxloc += 2*sxinc;
394 
395             dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 2), dd);
396             dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 1), dd);
397             dd = vis_faligndata(vis_ld_u8(src + 3*i1    ), dd);
398             dd = vis_faligndata(dzero, dd);
399             dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 2), dd);
400             dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 1), dd);
401             dd = vis_faligndata(vis_ld_u8(src + 3*i0    ), dd);
402             dd = vis_faligndata(dzero, dd);
403 
404             *(mlib_d64*)dst = dd;
405         }
406 
407         for (; dst < dst_end; dst++) {
408             i = tmpsxloc >> shift;
409             tmpsxloc += sxinc;
410             *(mlib_s32*)dst = READ_Bgr(i);
411         }
412 
413         PTR_ADD(dstBase, dstScan);
414         syloc += syinc;
415     }
416 }
417 
418 /***************************************************************/
419 
ADD_SUFF(IntArgbBmToIntBgrXparOver)420 void ADD_SUFF(IntArgbBmToIntBgrXparOver)(BLIT_PARAMS)
421 {
422     mlib_s32 dstScan = pDstInfo->scanStride;
423     mlib_s32 srcScan = pSrcInfo->scanStride;
424     mlib_d64 dd;
425     mlib_s32 i, i0, j, mask, x;
426 
427     if (dstScan == 4*width && srcScan == 4*width) {
428         width *= height;
429         height = 1;
430     }
431 
432     for (j = 0; j < height; j++) {
433         mlib_s32 *src = srcBase;
434         mlib_s32 *dst = dstBase;
435 
436         i = i0 = 0;
437 
438         if ((mlib_s32)dst & 7) {
439             if (*(mlib_u8*)(src + i)) {
440                 x = src[i];
441                 dst[i] = ARGB_to_GBGR(x);
442             }
443             i0 = 1;
444         }
445 
446 #pragma pipeloop(0)
447         for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
448             ARGB_to_GBGR_FL2(dd, ((mlib_f32*)src)[i], ((mlib_f32*)src)[i + 1]);
449             mask = (((-*(mlib_u8*)(src + i)) >> 31) & 2) |
450                    (((-*(mlib_u8*)(src + i + 1)) >> 31) & 1);
451             vis_pst_32(dd, dst + i, mask);
452         }
453 
454         if (i < width) {
455             if (*(mlib_u8*)(src + i)) {
456                 x = src[i];
457                 dst[i] = ARGB_to_GBGR(x);
458             }
459         }
460 
461         PTR_ADD(dstBase, dstScan);
462         PTR_ADD(srcBase, srcScan);
463     }
464 }
465 
466 /***************************************************************/
467 
ADD_SUFF(IntArgbBmToIntBgrScaleXparOver)468 void ADD_SUFF(IntArgbBmToIntBgrScaleXparOver)(SCALE_PARAMS)
469 {
470     mlib_s32 dstScan = pDstInfo->scanStride;
471     mlib_s32 srcScan = pSrcInfo->scanStride;
472     mlib_d64 dd;
473     mlib_s32 j, mask;
474 
475     for (j = 0; j < height; j++) {
476         mlib_s32 *src = srcBase;
477         mlib_s32 *dst = dstBase;
478         mlib_s32 *dst_end = dst + width;
479         mlib_s32 tmpsxloc = sxloc;
480 
481         PTR_ADD(src, (syloc >> shift) * srcScan);
482 
483         if ((mlib_s32)dst & 7) {
484             mlib_s32 *pp = src + (tmpsxloc >> shift);
485             if (*(mlib_u8*)pp) {
486                 *dst = ARGB_to_GBGR(*pp);
487             }
488             dst++;
489             tmpsxloc += sxinc;
490         }
491 
492 #pragma pipeloop(0)
493         for (; dst <= dst_end - 2; dst += 2) {
494             mlib_s32 *pp0 = src + (tmpsxloc >> shift);
495             mlib_s32 *pp1 = src + ((tmpsxloc + sxinc) >> shift);
496             ARGB_to_GBGR_FL2(dd, *(mlib_f32*)pp0, *(mlib_f32*)pp1);
497             mask = (((-*(mlib_u8*)pp0) >> 31) & 2) |
498                    ((mlib_u32)(-*(mlib_u8*)pp1) >> 31);
499             vis_pst_32(dd, dst, mask);
500             tmpsxloc += 2*sxinc;
501         }
502 
503         for (; dst < dst_end; dst++) {
504             mlib_s32 *pp = src + (tmpsxloc >> shift);
505             if (*(mlib_u8*)pp) {
506                 *dst = ARGB_to_GBGR(*pp);
507             }
508         }
509 
510         PTR_ADD(dstBase, dstScan);
511         syloc += syinc;
512     }
513 }
514 
515 /***************************************************************/
516 
ADD_SUFF(IntArgbBmToIntBgrXparBgCopy)517 void ADD_SUFF(IntArgbBmToIntBgrXparBgCopy)(BCOPY_PARAMS)
518 {
519     mlib_s32 dstScan = pDstInfo->scanStride;
520     mlib_s32 srcScan = pSrcInfo->scanStride;
521     mlib_d64 dd, d_bgpixel;
522     mlib_s32 i, i0, j, mask;
523 
524     if (dstScan == 4*width && srcScan == 4*width) {
525         width *= height;
526         height = 1;
527     }
528 
529     vis_alignaddr(NULL, 1);
530     d_bgpixel = vis_to_double_dup(bgpixel);
531 
532     for (j = 0; j < height; j++) {
533         mlib_s32 *src = srcBase;
534         mlib_s32 *dst = dstBase;
535 
536         i = i0 = 0;
537 
538         if ((mlib_s32)dst & 7) {
539             if (*(mlib_u8*)(src + i)) {
540                 dst[i] = ARGB_to_GBGR(src[i]);
541             } else {
542                 dst[i] = bgpixel;
543             }
544             i0 = 1;
545         }
546 
547 #pragma pipeloop(0)
548         for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
549             ARGB_to_GBGR_FL2(dd, ((mlib_f32*)src)[i], ((mlib_f32*)src)[i + 1]);
550             mask = (((-*(mlib_u8*)(src + i)) >> 31) & 2) |
551                    (((-*(mlib_u8*)(src + i + 1)) >> 31) & 1);
552             *(mlib_d64*)(dst + i) = d_bgpixel;
553             vis_pst_32(dd, dst + i, mask);
554         }
555 
556         if (i < width) {
557             if (*(mlib_u8*)(src + i)) {
558                 dst[i] = ARGB_to_GBGR(src[i]);
559             } else {
560                 dst[i] = bgpixel;
561             }
562         }
563 
564         PTR_ADD(dstBase, dstScan);
565         PTR_ADD(srcBase, srcScan);
566     }
567 }
568 
569 /***************************************************************/
570 
ADD_SUFF(ByteIndexedToIntBgrConvert)571 void ADD_SUFF(ByteIndexedToIntBgrConvert)(BLIT_PARAMS)
572 {
573     jint *pixLut = pSrcInfo->lutBase;
574     mlib_s32 dstScan = pDstInfo->scanStride;
575     mlib_s32 srcScan = pSrcInfo->scanStride;
576     mlib_d64 dd;
577     mlib_s32 i, i0, j, x;
578 
579     if (srcScan == width && dstScan == 4*width) {
580         width *= height;
581         height = 1;
582     }
583 
584     for (j = 0; j < height; j++) {
585         mlib_u8  *src = srcBase;
586         mlib_s32 *dst = dstBase;
587 
588         i = i0 = 0;
589 
590         if ((mlib_s32)dst & 7) {
591             x = pixLut[src[i]];
592             dst[i] = ARGB_to_GBGR(x);
593             i0 = 1;
594         }
595 
596 #pragma pipeloop(0)
597         for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
598             ARGB_to_GBGR_FL2(dd, ((mlib_f32*)pixLut)[src[i]],
599                                  ((mlib_f32*)pixLut)[src[i + 1]]);
600             *(mlib_d64*)(dst + i) = dd;
601         }
602 
603         for (; i < width; i++) {
604             x = pixLut[src[i]];
605             dst[i] = ARGB_to_GBGR(x);
606         }
607 
608         PTR_ADD(dstBase, dstScan);
609         PTR_ADD(srcBase, srcScan);
610     }
611 }
612 
613 /***************************************************************/
614 
ADD_SUFF(ByteIndexedToIntBgrScaleConvert)615 void ADD_SUFF(ByteIndexedToIntBgrScaleConvert)(SCALE_PARAMS)
616 {
617     jint *pixLut = pSrcInfo->lutBase;
618     mlib_s32 dstScan = pDstInfo->scanStride;
619     mlib_s32 srcScan = pSrcInfo->scanStride;
620     mlib_d64 dd;
621     mlib_s32 j, x;
622 
623     for (j = 0; j < height; j++) {
624         mlib_u8  *src = srcBase;
625         mlib_s32 *dst = dstBase;
626         mlib_s32 *dst_end = dst + width;
627         mlib_s32 tmpsxloc = sxloc;
628 
629         PTR_ADD(src, (syloc >> shift) * srcScan);
630 
631         if ((mlib_s32)dst & 7) {
632             x = pixLut[src[tmpsxloc >> shift]];
633             *dst++ = ARGB_to_GBGR(x);
634             tmpsxloc += sxinc;
635         }
636 
637 #pragma pipeloop(0)
638         for (; dst <= dst_end - 2; dst += 2) {
639             mlib_f32 f0 = ((mlib_f32*)pixLut)[src[tmpsxloc >> shift]];
640             mlib_f32 f1 = ((mlib_f32*)pixLut)[src[(tmpsxloc + sxinc) >> shift]];
641             ARGB_to_GBGR_FL2(dd, f0, f1);
642             *(mlib_d64*)dst = dd;
643             tmpsxloc += 2*sxinc;
644         }
645 
646         for (; dst < dst_end; dst++) {
647             x = pixLut[src[tmpsxloc >> shift]];
648             *dst++ = ARGB_to_GBGR(x);
649             tmpsxloc += sxinc;
650         }
651 
652         PTR_ADD(dstBase, dstScan);
653         syloc += syinc;
654     }
655 }
656 
657 /***************************************************************/
658 
ADD_SUFF(ByteIndexedBmToIntBgrXparOver)659 void ADD_SUFF(ByteIndexedBmToIntBgrXparOver)(BLIT_PARAMS)
660 {
661     jint *pixLut = pSrcInfo->lutBase;
662     mlib_s32 dstScan = pDstInfo->scanStride;
663     mlib_s32 srcScan = pSrcInfo->scanStride;
664     mlib_d64 dd;
665     mlib_s32 i, i0, j, x, mask;
666 
667     if (srcScan == width && dstScan == 4*width) {
668         width *= height;
669         height = 1;
670     }
671 
672     for (j = 0; j < height; j++) {
673         mlib_u8  *src = srcBase;
674         mlib_s32 *dst = dstBase;
675 
676         i = i0 = 0;
677 
678         if ((mlib_s32)dst & 7) {
679             x = pixLut[src[i]];
680             if (x < 0) {
681                 dst[i] = ARGB_to_BGR(x);
682             }
683             i0 = 1;
684         }
685 
686 #pragma pipeloop(0)
687         for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
688             mlib_f32 *pp0 = (mlib_f32*)pixLut + src[i];
689             mlib_f32 *pp1 = (mlib_f32*)pixLut + src[i + 1];
690             ARGB_to_BGR_FL2(dd, *pp0, *pp1);
691             mask = (((*(mlib_u8*)pp0) >> 6) & 2) | ((*(mlib_u8*)pp1) >> 7);
692             vis_pst_32(dd, dst + i, mask);
693         }
694 
695         for (; i < width; i++) {
696             x = pixLut[src[i]];
697             if (x < 0) {
698                 dst[i] = ARGB_to_BGR(x);
699             }
700         }
701 
702         PTR_ADD(dstBase, dstScan);
703         PTR_ADD(srcBase, srcScan);
704     }
705 }
706 
707 /***************************************************************/
708 
ADD_SUFF(ByteIndexedBmToIntBgrScaleXparOver)709 void ADD_SUFF(ByteIndexedBmToIntBgrScaleXparOver)(SCALE_PARAMS)
710 {
711     jint *pixLut = pSrcInfo->lutBase;
712     mlib_s32 dstScan = pDstInfo->scanStride;
713     mlib_s32 srcScan = pSrcInfo->scanStride;
714     mlib_d64 dd;
715     mlib_s32 j, x, mask;
716 
717     for (j = 0; j < height; j++) {
718         mlib_u8  *src = srcBase;
719         mlib_s32 *dst = dstBase;
720         mlib_s32 *dst_end = dst + width;
721         mlib_s32 tmpsxloc = sxloc;
722 
723         PTR_ADD(src, (syloc >> shift) * srcScan);
724 
725         if ((mlib_s32)dst & 7) {
726             x = pixLut[src[tmpsxloc >> shift]];
727             tmpsxloc += sxinc;
728             if (x < 0) {
729                 *dst = ARGB_to_BGR(x);
730             }
731             dst++;
732         }
733 
734 #pragma pipeloop(0)
735         for (; dst <= dst_end - 2; dst += 2) {
736             mlib_f32 *p0 = (mlib_f32*)pixLut + src[tmpsxloc >> shift];
737             mlib_f32 *p1 = (mlib_f32*)pixLut + src[(tmpsxloc + sxinc) >> shift];
738             ARGB_to_BGR_FL2(dd, *p0, *p1);
739             mask = (((*(mlib_u8*)p0) >> 6) & 2) | ((*(mlib_u8*)p1) >> 7);
740             tmpsxloc += 2*sxinc;
741             vis_pst_32(dd, dst, mask);
742         }
743 
744         for (; dst < dst_end; dst++) {
745             x = pixLut[src[tmpsxloc >> shift]];
746             tmpsxloc += sxinc;
747             if (x < 0) {
748                 *dst = ARGB_to_BGR(x);
749             }
750         }
751 
752         PTR_ADD(dstBase, dstScan);
753         syloc += syinc;
754     }
755 }
756 
757 /***************************************************************/
758 
ADD_SUFF(ByteIndexedBmToIntBgrXparBgCopy)759 void ADD_SUFF(ByteIndexedBmToIntBgrXparBgCopy)(BCOPY_PARAMS)
760 {
761     jint *pixLut = pSrcInfo->lutBase;
762     mlib_s32 dstScan = pDstInfo->scanStride;
763     mlib_s32 srcScan = pSrcInfo->scanStride;
764     mlib_d64 dd, d_bgpixel;
765     mlib_s32 j, x, mask;
766 
767     if (srcScan == width && dstScan == 4*width) {
768         width *= height;
769         height = 1;
770     }
771 
772     d_bgpixel = vis_to_double_dup(bgpixel);
773 
774     for (j = 0; j < height; j++) {
775         mlib_u8  *src = srcBase;
776         mlib_s32 *dst = dstBase;
777         mlib_s32 *dst_end;
778 
779         dst_end = dst + width;
780 
781         if ((mlib_s32)dst & 7) {
782             x = pixLut[*src++];
783             if (x < 0) {
784                 *dst = ARGB_to_GBGR(x);
785             } else {
786                 *dst = bgpixel;
787             }
788             dst++;
789         }
790 
791 #pragma pipeloop(0)
792         for (; dst <= (dst_end - 2); dst += 2) {
793             mlib_f32 *pp0 = (mlib_f32*)pixLut + src[0];
794             mlib_f32 *pp1 = (mlib_f32*)pixLut + src[1];
795             ARGB_to_GBGR_FL2(dd, *pp0, *pp1);
796             mask = (((*(mlib_u8*)pp0) >> 6) & 2) | ((*(mlib_u8*)pp1) >> 7);
797             *(mlib_d64*)dst = d_bgpixel;
798             vis_pst_32(dd, dst, mask);
799             src += 2;
800         }
801 
802         while (dst < dst_end) {
803             x = pixLut[*src++];
804             if (x < 0) {
805                 *dst = ARGB_to_GBGR(x);
806             } else {
807                 *dst = bgpixel;
808             }
809             dst++;
810         }
811 
812         PTR_ADD(dstBase, dstScan);
813         PTR_ADD(srcBase, srcScan);
814     }
815 }
816 
817 /***************************************************************/
818 
ADD_SUFF(IntBgrDrawGlyphListAA)819 void ADD_SUFF(IntBgrDrawGlyphListAA)(GLYPH_LIST_PARAMS)
820 {
821     mlib_s32 glyphCounter;
822     mlib_s32 scan = pRasInfo->scanStride;
823     mlib_u8  *dstBase;
824     mlib_s32 j;
825     mlib_d64 dmix0, dmix1, dd, d0, d1, e0, e1, fgpixel_d;
826     mlib_d64 done, done16, d_half, maskRGB, dzero;
827     mlib_s32 pix, mask, mask_z;
828     mlib_f32 srcG_f;
829 
830     done = vis_to_double_dup(0x7fff7fff);
831     done16 = vis_to_double_dup(0x7fff);
832     d_half = vis_to_double_dup((1 << (16 + 6)) | (1 << 6));
833 
834     fgpixel_d = vis_to_double_dup(fgpixel);
835     srcG_f = vis_to_float(argbcolor);
836     maskRGB = vis_to_double_dup(0xffffff);
837     dzero = vis_fzero();
838 
839     ARGB2ABGR_FL(srcG_f)
840 
841     vis_write_gsr(0 << 3);
842 
843     for (glyphCounter = 0; glyphCounter < totalGlyphs; glyphCounter++) {
844         const jubyte *pixels;
845         unsigned int rowBytes;
846         int left, top;
847         int width, height;
848         int right, bottom;
849 
850         pixels = (const jubyte *) glyphs[glyphCounter].pixels;
851 
852         if (!pixels) continue;
853 
854         left = glyphs[glyphCounter].x;
855         top = glyphs[glyphCounter].y;
856         width = glyphs[glyphCounter].width;
857         height = glyphs[glyphCounter].height;
858         rowBytes = width;
859         right = left + width;
860         bottom = top + height;
861         if (left < clipLeft) {
862             pixels += clipLeft - left;
863             left = clipLeft;
864         }
865         if (top < clipTop) {
866             pixels += (clipTop - top) * rowBytes;
867             top = clipTop;
868         }
869         if (right > clipRight) {
870             right = clipRight;
871         }
872         if (bottom > clipBottom) {
873             bottom = clipBottom;
874         }
875         if (right <= left || bottom <= top) {
876             continue;
877         }
878         width = right - left;
879         height = bottom - top;
880 
881         dstBase = pRasInfo->rasBase;
882         PTR_ADD(dstBase, top*scan + 4*left);
883 
884         for (j = 0; j < height; j++) {
885             mlib_u8  *src = (void*)pixels;
886             mlib_s32 *dst, *dst_end;
887 
888             dst = (void*)dstBase;
889             dst_end = dst + width;
890 
891             if ((mlib_s32)dst & 7) {
892                 pix = *src++;
893                 if (pix) {
894                     dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
895                     dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
896                     *(mlib_f32*)dst = vis_fands(vis_fpack16(dd),
897                                                 vis_read_hi(maskRGB));
898                     if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
899                 }
900                 dst++;
901             }
902 
903 #pragma pipeloop(0)
904             for (; dst <= (dst_end - 2); dst += 2) {
905                 dmix0 = vis_freg_pair(((mlib_f32 *)vis_mul8s_tbl)[src[0]],
906                                       ((mlib_f32 *)vis_mul8s_tbl)[src[1]]);
907                 mask = vis_fcmplt32(dmix0, done16);
908                 mask_z = vis_fcmpne32(dmix0, dzero);
909                 dmix1 = vis_fpsub16(done, dmix0);
910                 src += 2;
911 
912                 dd = *(mlib_d64*)dst;
913                 d0 = vis_fmul8x16al(srcG_f, vis_read_hi(dmix0));
914                 d1 = vis_fmul8x16al(srcG_f, vis_read_lo(dmix0));
915                 e0 = vis_fmul8x16al(vis_read_hi(dd), vis_read_hi(dmix1));
916                 e1 = vis_fmul8x16al(vis_read_lo(dd), vis_read_lo(dmix1));
917                 d0 = vis_fpadd16(vis_fpadd16(d0, d_half), e0);
918                 d1 = vis_fpadd16(vis_fpadd16(d1, d_half), e1);
919                 dd = vis_fpack16_pair(d0, d1);
920                 dd = vis_fand(dd, maskRGB);
921 
922                 vis_pst_32(fgpixel_d, dst, mask_z);
923                 vis_pst_32(dd, dst, mask & mask_z);
924             }
925 
926             while (dst < dst_end) {
927                 pix = *src++;
928                 if (pix) {
929                     dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
930                     dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
931                     *(mlib_f32*)dst = vis_fands(vis_fpack16(dd),
932                                                 vis_read_hi(maskRGB));
933                     if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
934                 }
935                 dst++;
936             }
937 
938             PTR_ADD(dstBase, scan);
939             pixels += rowBytes;
940         }
941     }
942 }
943 
944 /***************************************************************/
945 
946 #endif /* JAVA2D_NO_MLIB */
947