1 /*
2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 #if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
27
28 #include "vis_AlphaMacros.h"
29
30 /***************************************************************/
31
32 #define Gray2RGBx(x) \
33 (x << 24) | (x << 16) | (x << 8)
34
35 /***************************************************************/
36
37 #define READ_RGBx(i) \
38 (src[3*i + 2] << 24) | (src[3*i + 1] << 16) | (src[3*i] << 8)
39
40 /***************************************************************/
41
ADD_SUFF(IntRgbxToIntArgbConvert)42 void ADD_SUFF(IntRgbxToIntArgbConvert)(BLIT_PARAMS)
43 {
44 mlib_s32 dstScan = pDstInfo->scanStride;
45 mlib_s32 srcScan = pSrcInfo->scanStride;
46 mlib_d64 dd, mask;
47 mlib_s32 i, i0, j;
48
49 if (dstScan == 4*width && srcScan == 4*width) {
50 width *= height;
51 height = 1;
52 }
53
54 mask = vis_to_double_dup(0xFF000000);
55 vis_alignaddr(NULL, 7);
56
57 for (j = 0; j < height; j++) {
58 mlib_u32 *src = srcBase;
59 mlib_u32 *dst = dstBase;
60
61 i = i0 = 0;
62
63 if ((mlib_s32)dst & 7) {
64 dst[i] = 0xff000000 | (src[i] >> 8);
65 i0 = 1;
66 }
67
68 #pragma pipeloop(0)
69 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
70 dd = vis_freg_pair(((mlib_f32*)src)[i], ((mlib_f32*)src)[i + 1]);
71 dd = vis_faligndata(dd, dd);
72 *(mlib_d64*)(dst + i) = vis_for(dd, mask);
73 }
74
75 if (i < width) {
76 dst[i] = 0xff000000 | (src[i] >> 8);
77 }
78
79 PTR_ADD(dstBase, dstScan);
80 PTR_ADD(srcBase, srcScan);
81 }
82 }
83
84 /***************************************************************/
85
ADD_SUFF(IntRgbxToIntArgbScaleConvert)86 void ADD_SUFF(IntRgbxToIntArgbScaleConvert)(SCALE_PARAMS)
87 {
88 mlib_s32 dstScan = pDstInfo->scanStride;
89 mlib_s32 srcScan = pSrcInfo->scanStride;
90 mlib_d64 dd, mask;
91 mlib_s32 j;
92
93 mask = vis_to_double_dup(0xFF000000);
94 vis_alignaddr(NULL, 7);
95
96 for (j = 0; j < height; j++) {
97 mlib_u32 *src = srcBase;
98 mlib_u32 *dst = dstBase;
99 mlib_u32 *dst_end = dst + width;
100 mlib_s32 tmpsxloc = sxloc;
101
102 PTR_ADD(src, (syloc >> shift) * srcScan);
103
104 if ((mlib_s32)dst & 7) {
105 *dst++ = 0xff000000 | (src[tmpsxloc >> shift] >> 8);
106 tmpsxloc += sxinc;
107 }
108
109 #pragma pipeloop(0)
110 for (; dst <= dst_end - 2; dst += 2) {
111 dd = vis_freg_pair(((mlib_f32*)src)[tmpsxloc >> shift],
112 ((mlib_f32*)src)[(tmpsxloc + sxinc) >> shift]);
113 dd = vis_faligndata(dd, dd);
114 *(mlib_d64*)dst = vis_for(dd, mask);
115 tmpsxloc += 2*sxinc;
116 }
117
118 for (; dst < dst_end; dst++) {
119 *dst++ = 0xff000000 | (src[tmpsxloc >> shift] >> 8);
120 tmpsxloc += sxinc;
121 }
122
123 PTR_ADD(dstBase, dstScan);
124 syloc += syinc;
125 }
126 }
127
128 /***************************************************************/
129
ADD_SUFF(IntArgbToIntRgbxConvert)130 void ADD_SUFF(IntArgbToIntRgbxConvert)(BLIT_PARAMS)
131 {
132 mlib_s32 dstScan = pDstInfo->scanStride;
133 mlib_s32 srcScan = pSrcInfo->scanStride;
134 mlib_d64 dd, mask;
135 mlib_s32 i, i0, j;
136
137 if (dstScan == 4*width && srcScan == 4*width) {
138 width *= height;
139 height = 1;
140 }
141
142 mask = vis_to_double_dup(0xFFFFFF00);
143 vis_alignaddr(NULL, 1);
144
145 for (j = 0; j < height; j++) {
146 mlib_u32 *src = srcBase;
147 mlib_u32 *dst = dstBase;
148
149 i = i0 = 0;
150
151 if ((mlib_s32)dst & 7) {
152 dst[i] = src[i] << 8;
153 i0 = 1;
154 }
155
156 #pragma pipeloop(0)
157 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
158 dd = vis_freg_pair(((mlib_f32*)src)[i], ((mlib_f32*)src)[i + 1]);
159 dd = vis_faligndata(dd, dd);
160 *(mlib_d64*)(dst + i) = vis_fand(dd, mask);
161 }
162
163 if (i < width) {
164 dst[i] = src[i] << 8;
165 }
166
167 PTR_ADD(dstBase, dstScan);
168 PTR_ADD(srcBase, srcScan);
169 }
170 }
171
172 /***************************************************************/
173
ADD_SUFF(IntArgbToIntRgbxScaleConvert)174 void ADD_SUFF(IntArgbToIntRgbxScaleConvert)(SCALE_PARAMS)
175 {
176 mlib_s32 dstScan = pDstInfo->scanStride;
177 mlib_s32 srcScan = pSrcInfo->scanStride;
178 mlib_d64 dd, mask;
179 mlib_s32 j;
180
181 mask = vis_to_double_dup(0xFFFFFF00);
182 vis_alignaddr(NULL, 1);
183
184 for (j = 0; j < height; j++) {
185 mlib_u32 *src = srcBase;
186 mlib_u32 *dst = dstBase;
187 mlib_u32 *dst_end = dst + width;
188 mlib_s32 tmpsxloc = sxloc;
189
190 PTR_ADD(src, (syloc >> shift) * srcScan);
191
192 if ((mlib_s32)dst & 7) {
193 *dst++ = src[tmpsxloc >> shift] << 8;
194 tmpsxloc += sxinc;
195 }
196
197 #pragma pipeloop(0)
198 for (; dst <= dst_end - 2; dst += 2) {
199 dd = vis_freg_pair(((mlib_f32*)src)[tmpsxloc >> shift],
200 ((mlib_f32*)src)[(tmpsxloc + sxinc) >> shift]);
201 dd = vis_faligndata(dd, dd);
202 *(mlib_d64*)dst = vis_fand(dd, mask);
203 tmpsxloc += 2*sxinc;
204 }
205
206 for (; dst < dst_end; dst++) {
207 *dst++ = src[tmpsxloc >> shift] << 8;
208 tmpsxloc += sxinc;
209 }
210
211 PTR_ADD(dstBase, dstScan);
212 syloc += syinc;
213 }
214 }
215
216 /***************************************************************/
217
218 #define BGR_TO_RGBx { \
219 mlib_d64 sda, sdb, sdc, sdd, sde, sdf; \
220 mlib_d64 a13, b13, a02, b02; \
221 \
222 sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1)); \
223 sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2)); \
224 sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2)); \
225 \
226 sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb)); \
227 sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc)); \
228 sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc)); \
229 \
230 s_2 = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde)); \
231 s_1 = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf)); \
232 s_0 = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf)); \
233 \
234 a13 = vis_fpmerge(vis_read_hi(s_1), vis_read_hi(s_3)); \
235 b13 = vis_fpmerge(vis_read_lo(s_1), vis_read_lo(s_3)); \
236 a02 = vis_fpmerge(vis_read_hi(s_0), vis_read_hi(s_2)); \
237 b02 = vis_fpmerge(vis_read_lo(s_0), vis_read_lo(s_2)); \
238 \
239 dd0 = vis_fpmerge(vis_read_hi(a02), vis_read_hi(a13)); \
240 dd1 = vis_fpmerge(vis_read_lo(a02), vis_read_lo(a13)); \
241 dd2 = vis_fpmerge(vis_read_hi(b02), vis_read_hi(b13)); \
242 dd3 = vis_fpmerge(vis_read_lo(b02), vis_read_lo(b13)); \
243 }
244
245 /***************************************************************/
246
ADD_SUFF(ThreeByteBgrToIntRgbxConvert)247 void ADD_SUFF(ThreeByteBgrToIntRgbxConvert)(BLIT_PARAMS)
248 {
249 mlib_s32 dstScan = pDstInfo->scanStride;
250 mlib_s32 srcScan = pSrcInfo->scanStride;
251 mlib_d64 *sp;
252 mlib_d64 s_0, s_1, s_2, s_3;
253 mlib_d64 s0, s1, s2, s3, sd0, sd1, sd2, dd0, dd1, dd2, dd3;
254 mlib_s32 i, i0, j;
255
256 if (width < 16) {
257 for (j = 0; j < height; j++) {
258 mlib_u8 *src = srcBase;
259 mlib_u32 *dst = dstBase;
260
261 for (i = 0; i < width; i++) {
262 dst[i] = READ_RGBx(i);
263 }
264
265 PTR_ADD(dstBase, dstScan);
266 PTR_ADD(srcBase, srcScan);
267 }
268 return;
269 }
270
271 if (srcScan == 3*width && dstScan == 4*width) {
272 width *= height;
273 height = 1;
274 }
275
276 s_3 = vis_fzero();
277
278 for (j = 0; j < height; j++) {
279 mlib_u8 *src = srcBase;
280 mlib_f32 *dst = dstBase;
281
282 i = i0 = 0;
283
284 if ((mlib_s32)dst & 7) {
285 ((mlib_s32*)dst)[i] = READ_RGBx(i);
286 i0 = 1;
287 }
288
289 sp = vis_alignaddr(src, 3*i0);
290 s3 = *sp++;
291
292 #pragma pipeloop(0)
293 for (i = i0; i <= (mlib_s32)width - 8; i += 8) {
294 s0 = s3;
295 s1 = *sp++;
296 s2 = *sp++;
297 s3 = *sp++;
298 sd0 = vis_faligndata(s0, s1);
299 sd1 = vis_faligndata(s1, s2);
300 sd2 = vis_faligndata(s2, s3);
301
302 BGR_TO_RGBx
303
304 *(mlib_d64*)(dst + i ) = dd0;
305 *(mlib_d64*)(dst + i + 2) = dd1;
306 *(mlib_d64*)(dst + i + 4) = dd2;
307 *(mlib_d64*)(dst + i + 6) = dd3;
308 }
309
310 for (; i < width; i++) {
311 ((mlib_s32*)dst)[i] = READ_RGBx(i);
312 }
313
314 PTR_ADD(dstBase, dstScan);
315 PTR_ADD(srcBase, srcScan);
316 }
317 }
318
319 /***************************************************************/
320
ADD_SUFF(ThreeByteBgrToIntRgbxScaleConvert)321 void ADD_SUFF(ThreeByteBgrToIntRgbxScaleConvert)(SCALE_PARAMS)
322 {
323 mlib_s32 dstScan = pDstInfo->scanStride;
324 mlib_s32 srcScan = pSrcInfo->scanStride;
325 mlib_d64 dd, dzero;
326 mlib_s32 i, i0, i1, j;
327
328 if (width < 16) {
329 for (j = 0; j < height; j++) {
330 mlib_u8 *src = srcBase;
331 mlib_s32 *dst = dstBase;
332 mlib_s32 *dst_end = dst + width;
333 mlib_s32 tmpsxloc = sxloc;
334
335 PTR_ADD(src, (syloc >> shift) * srcScan);
336
337 for (; dst < dst_end; dst++) {
338 i = tmpsxloc >> shift;
339 tmpsxloc += sxinc;
340 *(mlib_s32*)dst = READ_RGBx(i);
341 }
342
343 PTR_ADD(dstBase, dstScan);
344 syloc += syinc;
345 }
346 return;
347 }
348
349 dzero = vis_fzero();
350
351 vis_alignaddr(NULL, 7);
352
353 for (j = 0; j < height; j++) {
354 mlib_u8 *src = srcBase;
355 mlib_f32 *dst = dstBase;
356 mlib_f32 *dst_end = dst + width;
357 mlib_s32 tmpsxloc = sxloc;
358
359 PTR_ADD(src, (syloc >> shift) * srcScan);
360
361 if ((mlib_s32)dst & 7) {
362 i = tmpsxloc >> shift;
363 tmpsxloc += sxinc;
364 *(mlib_s32*)dst = READ_RGBx(i);
365 dst++;
366 }
367
368 #pragma pipeloop(0)
369 for (; dst <= dst_end - 2; dst += 2) {
370 i0 = tmpsxloc >> shift;
371 i1 = (tmpsxloc + sxinc) >> shift;
372 tmpsxloc += 2*sxinc;
373
374 dd = vis_faligndata(vis_ld_u8(src + 3*i1 ), dzero);
375 dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 1), dd);
376 dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 2), dd);
377 dd = vis_faligndata(dzero, dd);
378 dd = vis_faligndata(vis_ld_u8(src + 3*i0 ), dd);
379 dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 1), dd);
380 dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 2), dd);
381
382 *(mlib_d64*)dst = dd;
383 }
384
385 for (; dst < dst_end; dst++) {
386 i = tmpsxloc >> shift;
387 tmpsxloc += sxinc;
388 *(mlib_s32*)dst = READ_RGBx(i);
389 }
390
391 PTR_ADD(dstBase, dstScan);
392 syloc += syinc;
393 }
394 }
395
396 /***************************************************************/
397
ADD_SUFF(ByteGrayToIntRgbxConvert)398 void ADD_SUFF(ByteGrayToIntRgbxConvert)(BLIT_PARAMS)
399 {
400 mlib_s32 dstScan = pDstInfo->scanStride;
401 mlib_s32 srcScan = pSrcInfo->scanStride;
402 mlib_d64 d0, d1, d2, d3;
403 mlib_f32 ff, aa = vis_fzero();
404 mlib_s32 i, j, x;
405
406 if (width < 8) {
407 for (j = 0; j < height; j++) {
408 mlib_u8 *src = srcBase;
409 mlib_s32 *dst = dstBase;
410
411 for (i = 0; i < width; i++) {
412 x = src[i];
413 dst[i] = Gray2RGBx(x);
414 }
415
416 PTR_ADD(dstBase, dstScan);
417 PTR_ADD(srcBase, srcScan);
418 }
419 return;
420 }
421
422 if (srcScan == width && dstScan == 4*width) {
423 width *= height;
424 height = 1;
425 }
426
427 for (j = 0; j < height; j++) {
428 mlib_u8 *src = srcBase;
429 mlib_s32 *dst = dstBase;
430 mlib_s32 *dst_end;
431
432 dst_end = dst + width;
433
434 while (((mlib_s32)src & 3) && dst < dst_end) {
435 x = *src++;
436 *dst++ = Gray2RGBx(x);
437 }
438
439 #pragma pipeloop(0)
440 for (; dst <= (dst_end - 4); dst += 4) {
441 ff = *(mlib_f32*)src;
442 d0 = vis_fpmerge(ff, ff);
443 d1 = vis_fpmerge(ff, aa);
444 d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
445 d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
446 ((mlib_f32*)dst)[0] = vis_read_hi(d2);
447 ((mlib_f32*)dst)[1] = vis_read_lo(d2);
448 ((mlib_f32*)dst)[2] = vis_read_hi(d3);
449 ((mlib_f32*)dst)[3] = vis_read_lo(d3);
450 src += 4;
451 }
452
453 while (dst < dst_end) {
454 x = *src++;
455 *dst++ = Gray2RGBx(x);
456 }
457
458 PTR_ADD(dstBase, dstScan);
459 PTR_ADD(srcBase, srcScan);
460 }
461 }
462
463 /***************************************************************/
464
ADD_SUFF(ByteGrayToIntRgbxScaleConvert)465 void ADD_SUFF(ByteGrayToIntRgbxScaleConvert)(SCALE_PARAMS)
466 {
467 mlib_s32 dstScan = pDstInfo->scanStride;
468 mlib_s32 srcScan = pSrcInfo->scanStride;
469 mlib_d64 d0, d1, d2, d3, dd;
470 mlib_f32 ff, aa = vis_fzero();
471 mlib_s32 i, j, x;
472
473 if (width < 16) {
474 for (j = 0; j < height; j++) {
475 mlib_u8 *src = srcBase;
476 mlib_s32 *dst = dstBase;
477 mlib_s32 tmpsxloc = sxloc;
478
479 PTR_ADD(src, (syloc >> shift) * srcScan);
480
481 for (i = 0; i < width; i++) {
482 x = src[tmpsxloc >> shift];
483 tmpsxloc += sxinc;
484 dst[i] = Gray2RGBx(x);
485 }
486
487 PTR_ADD(dstBase, dstScan);
488 syloc += syinc;
489 }
490 return;
491 }
492
493 vis_alignaddr(NULL, 7);
494
495 for (j = 0; j < height; j++) {
496 mlib_u8 *src = srcBase;
497 mlib_s32 *dst = dstBase;
498 mlib_s32 *dst_end;
499 mlib_s32 tmpsxloc = sxloc;
500
501 PTR_ADD(src, (syloc >> shift) * srcScan);
502
503 dst_end = dst + width;
504
505 #pragma pipeloop(0)
506 for (; dst <= (dst_end - 4); dst += 4) {
507 LOAD_NEXT_U8(dd, src + ((tmpsxloc + 3*sxinc) >> shift));
508 LOAD_NEXT_U8(dd, src + ((tmpsxloc + 2*sxinc) >> shift));
509 LOAD_NEXT_U8(dd, src + ((tmpsxloc + sxinc) >> shift));
510 LOAD_NEXT_U8(dd, src + ((tmpsxloc ) >> shift));
511 tmpsxloc += 4*sxinc;
512 ff = vis_read_hi(dd);
513 d0 = vis_fpmerge(ff, ff);
514 d1 = vis_fpmerge(ff, aa);
515 d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
516 d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
517 ((mlib_f32*)dst)[0] = vis_read_hi(d2);
518 ((mlib_f32*)dst)[1] = vis_read_lo(d2);
519 ((mlib_f32*)dst)[2] = vis_read_hi(d3);
520 ((mlib_f32*)dst)[3] = vis_read_lo(d3);
521 }
522
523 while (dst < dst_end) {
524 x = src[tmpsxloc >> shift];
525 tmpsxloc += sxinc;
526 *dst++ = Gray2RGBx(x);
527 }
528
529 PTR_ADD(dstBase, dstScan);
530 syloc += syinc;
531 }
532 }
533
534 /***************************************************************/
535
ADD_SUFF(IntArgbBmToIntRgbxXparOver)536 void ADD_SUFF(IntArgbBmToIntRgbxXparOver)(BLIT_PARAMS)
537 {
538 mlib_s32 dstScan = pDstInfo->scanStride;
539 mlib_s32 srcScan = pSrcInfo->scanStride;
540 mlib_d64 dd, maskRGBx;
541 mlib_s32 i, i0, j, mask;
542
543 if (dstScan == 4*width && srcScan == 4*width) {
544 width *= height;
545 height = 1;
546 }
547
548 vis_alignaddr(NULL, 1);
549 maskRGBx = vis_to_double_dup(0xFFFFFF00);
550
551 for (j = 0; j < height; j++) {
552 mlib_s32 *src = srcBase;
553 mlib_s32 *dst = dstBase;
554
555 i = i0 = 0;
556
557 if ((mlib_s32)dst & 7) {
558 if (*(mlib_u8*)(src + i)) {
559 dst[i] = src[i] << 8;
560 }
561 i0 = 1;
562 }
563
564 #pragma pipeloop(0)
565 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
566 dd = vis_freg_pair(((mlib_s32*)src)[i], ((mlib_s32*)src)[i + 1]);
567 dd = vis_fand(vis_faligndata(dd, dd), maskRGBx);
568 mask = (((-*(mlib_u8*)(src + i)) >> 31) & 2) |
569 (((-*(mlib_u8*)(src + i + 1)) >> 31) & 1);
570 vis_pst_32(dd, dst + i, mask);
571 }
572
573 if (i < width) {
574 if (*(mlib_u8*)(src + i)) {
575 dst[i] = src[i] << 8;
576 }
577 }
578
579 PTR_ADD(dstBase, dstScan);
580 PTR_ADD(srcBase, srcScan);
581 }
582 }
583
584 /***************************************************************/
585
ADD_SUFF(IntArgbBmToIntRgbxScaleXparOver)586 void ADD_SUFF(IntArgbBmToIntRgbxScaleXparOver)(SCALE_PARAMS)
587 {
588 mlib_s32 dstScan = pDstInfo->scanStride;
589 mlib_s32 srcScan = pSrcInfo->scanStride;
590 mlib_d64 dd, maskRGBx;
591 mlib_s32 j, mask;
592
593 vis_alignaddr(NULL, 1);
594 maskRGBx = vis_to_double_dup(0xFFFFFF00);
595
596 for (j = 0; j < height; j++) {
597 mlib_s32 *src = srcBase;
598 mlib_s32 *dst = dstBase;
599 mlib_s32 *dst_end = dst + width;
600 mlib_s32 tmpsxloc = sxloc;
601
602 PTR_ADD(src, (syloc >> shift) * srcScan);
603
604 if ((mlib_s32)dst & 7) {
605 mlib_s32 *pp = src + (tmpsxloc >> shift);
606 if (*(mlib_u8*)pp) {
607 *dst = *pp << 8;
608 }
609 dst++;
610 tmpsxloc += sxinc;
611 }
612
613 #pragma pipeloop(0)
614 for (; dst <= dst_end - 2; dst += 2) {
615 mlib_s32 *pp0 = src + (tmpsxloc >> shift);
616 mlib_s32 *pp1 = src + ((tmpsxloc + sxinc) >> shift);
617 dd = vis_freg_pair(*(mlib_f32*)pp0, *(mlib_f32*)pp1);
618 dd = vis_fand(vis_faligndata(dd, dd), maskRGBx);
619 mask = (((-*(mlib_u8*)pp0) >> 31) & 2) |
620 ((mlib_u32)(-*(mlib_u8*)pp1) >> 31);
621 vis_pst_32(dd, dst, mask);
622 tmpsxloc += 2*sxinc;
623 }
624
625 for (; dst < dst_end; dst++) {
626 mlib_s32 *pp = src + (tmpsxloc >> shift);
627 if (*(mlib_u8*)pp) {
628 *dst = *pp << 8;
629 }
630 }
631
632 PTR_ADD(dstBase, dstScan);
633 syloc += syinc;
634 }
635 }
636
637 /***************************************************************/
638
ADD_SUFF(IntArgbBmToIntRgbxXparBgCopy)639 void ADD_SUFF(IntArgbBmToIntRgbxXparBgCopy)(BCOPY_PARAMS)
640 {
641 mlib_s32 dstScan = pDstInfo->scanStride;
642 mlib_s32 srcScan = pSrcInfo->scanStride;
643 mlib_d64 dd, d_bgpixel, maskRGBx;
644 mlib_s32 i, i0, j, mask;
645
646 if (dstScan == 4*width && srcScan == 4*width) {
647 width *= height;
648 height = 1;
649 }
650
651 vis_alignaddr(NULL, 1);
652 d_bgpixel = vis_to_double_dup(bgpixel);
653 maskRGBx = vis_to_double_dup(0xFFFFFF00);
654
655 for (j = 0; j < height; j++) {
656 mlib_s32 *src = srcBase;
657 mlib_s32 *dst = dstBase;
658
659 i = i0 = 0;
660
661 if ((mlib_s32)dst & 7) {
662 if (*(mlib_u8*)(src + i)) {
663 dst[i] = src[i] << 8;
664 } else {
665 dst[i] = bgpixel;
666 }
667 i0 = 1;
668 }
669
670 #pragma pipeloop(0)
671 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
672 dd = vis_freg_pair(((mlib_s32*)src)[i], ((mlib_s32*)src)[i + 1]);
673 dd = vis_fand(vis_faligndata(dd, dd), maskRGBx);
674 mask = (((-*(mlib_u8*)(src + i)) >> 31) & 2) |
675 (((-*(mlib_u8*)(src + i + 1)) >> 31) & 1);
676 *(mlib_d64*)(dst + i) = d_bgpixel;
677 vis_pst_32(dd, dst + i, mask);
678 }
679
680 if (i < width) {
681 if (*(mlib_u8*)(src + i)) {
682 dst[i] = src[i] << 8;
683 } else {
684 dst[i] = bgpixel;
685 }
686 }
687
688 PTR_ADD(dstBase, dstScan);
689 PTR_ADD(srcBase, srcScan);
690 }
691 }
692
693 /***************************************************************/
694
ADD_SUFF(ByteIndexedToIntRgbxConvert)695 void ADD_SUFF(ByteIndexedToIntRgbxConvert)(BLIT_PARAMS)
696 {
697 jint *pixLut = pSrcInfo->lutBase;
698 mlib_s32 dstScan = pDstInfo->scanStride;
699 mlib_s32 srcScan = pSrcInfo->scanStride;
700 mlib_d64 dd, maskRGBx;
701 mlib_s32 i, i0, j;
702
703 if (srcScan == width && dstScan == 4*width) {
704 width *= height;
705 height = 1;
706 }
707
708 vis_alignaddr(NULL, 1);
709 maskRGBx = vis_to_double_dup(0xFFFFFF00);
710
711 for (j = 0; j < height; j++) {
712 mlib_u8 *src = srcBase;
713 mlib_s32 *dst = dstBase;
714
715 i = i0 = 0;
716
717 if ((mlib_s32)dst & 7) {
718 dst[i] = pixLut[src[i]] << 8;
719 i0 = 1;
720 }
721
722 #pragma pipeloop(0)
723 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
724 dd = vis_freg_pair(((mlib_f32*)pixLut)[src[i]],
725 ((mlib_f32*)pixLut)[src[i + 1]]);
726 dd = vis_fand(vis_faligndata(dd, dd), maskRGBx);
727 *(mlib_d64*)(dst + i) = dd;
728 }
729
730 for (; i < width; i++) {
731 dst[i] = pixLut[src[i]] << 8;
732 }
733
734 PTR_ADD(dstBase, dstScan);
735 PTR_ADD(srcBase, srcScan);
736 }
737 }
738
739 /***************************************************************/
740
ADD_SUFF(ByteIndexedToIntRgbxScaleConvert)741 void ADD_SUFF(ByteIndexedToIntRgbxScaleConvert)(SCALE_PARAMS)
742 {
743 jint *pixLut = pSrcInfo->lutBase;
744 mlib_s32 dstScan = pDstInfo->scanStride;
745 mlib_s32 srcScan = pSrcInfo->scanStride;
746 mlib_d64 dd, maskRGBx;
747 mlib_s32 j;
748
749 vis_alignaddr(NULL, 1);
750 maskRGBx = vis_to_double_dup(0xFFFFFF00);
751
752 for (j = 0; j < height; j++) {
753 mlib_u8 *src = srcBase;
754 mlib_s32 *dst = dstBase;
755 mlib_s32 *dst_end = dst + width;
756 mlib_s32 tmpsxloc = sxloc;
757
758 PTR_ADD(src, (syloc >> shift) * srcScan);
759
760 if ((mlib_s32)dst & 7) {
761 *dst++ = pixLut[src[tmpsxloc >> shift]] << 8;
762 tmpsxloc += sxinc;
763 }
764
765 #pragma pipeloop(0)
766 for (; dst <= dst_end - 2; dst += 2) {
767 dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
768 src[(tmpsxloc + sxinc) >> shift]);
769 dd = vis_fand(vis_faligndata(dd, dd), maskRGBx);
770 *(mlib_d64*)dst = dd;
771 tmpsxloc += 2*sxinc;
772 }
773
774 for (; dst < dst_end; dst++) {
775 *dst = pixLut[src[tmpsxloc >> shift]] << 8;
776 tmpsxloc += sxinc;
777 }
778
779 PTR_ADD(dstBase, dstScan);
780 syloc += syinc;
781 }
782 }
783
784 /***************************************************************/
785
ADD_SUFF(ByteIndexedBmToIntRgbxXparOver)786 void ADD_SUFF(ByteIndexedBmToIntRgbxXparOver)(BLIT_PARAMS)
787 {
788 jint *pixLut = pSrcInfo->lutBase;
789 mlib_s32 dstScan = pDstInfo->scanStride;
790 mlib_s32 srcScan = pSrcInfo->scanStride;
791 mlib_d64 dd, maskRGBx;
792 mlib_s32 i, i0, j, x, mask;
793
794 if (srcScan == width && dstScan == 4*width) {
795 width *= height;
796 height = 1;
797 }
798
799 vis_alignaddr(NULL, 1);
800 maskRGBx = vis_to_double_dup(0xFFFFFF00);
801
802 for (j = 0; j < height; j++) {
803 mlib_u8 *src = srcBase;
804 mlib_s32 *dst = dstBase;
805
806 i = i0 = 0;
807
808 if ((mlib_s32)dst & 7) {
809 x = pixLut[src[i]];
810 if (x < 0) {
811 dst[i] = x << 8;
812 }
813 i0 = 1;
814 }
815
816 #pragma pipeloop(0)
817 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
818 mlib_f32 *pp0 = (mlib_f32*)pixLut + src[i];
819 mlib_f32 *pp1 = (mlib_f32*)pixLut + src[i + 1];
820 dd = vis_freg_pair(*pp0, *pp1);
821 mask = (((*(mlib_u8*)pp0) >> 6) & 2) | ((*(mlib_u8*)pp1) >> 7);
822 dd = vis_fand(vis_faligndata(dd, dd), maskRGBx);
823 vis_pst_32(dd, dst + i, mask);
824 }
825
826 for (; i < width; i++) {
827 x = pixLut[src[i]];
828 if (x < 0) {
829 dst[i] = x << 8;
830 }
831 }
832
833 PTR_ADD(dstBase, dstScan);
834 PTR_ADD(srcBase, srcScan);
835 }
836 }
837
838 /***************************************************************/
839
ADD_SUFF(ByteIndexedBmToIntRgbxScaleXparOver)840 void ADD_SUFF(ByteIndexedBmToIntRgbxScaleXparOver)(SCALE_PARAMS)
841 {
842 jint *pixLut = pSrcInfo->lutBase;
843 mlib_s32 dstScan = pDstInfo->scanStride;
844 mlib_s32 srcScan = pSrcInfo->scanStride;
845 mlib_d64 dd, maskRGBx;
846 mlib_s32 j, x, mask;
847
848 vis_alignaddr(NULL, 1);
849 maskRGBx = vis_to_double_dup(0xFFFFFF00);
850
851 for (j = 0; j < height; j++) {
852 mlib_u8 *src = srcBase;
853 mlib_s32 *dst = dstBase;
854 mlib_s32 *dst_end = dst + width;
855 mlib_s32 tmpsxloc = sxloc;
856
857 PTR_ADD(src, (syloc >> shift) * srcScan);
858
859 if ((mlib_s32)dst & 7) {
860 x = pixLut[src[tmpsxloc >> shift]];
861 tmpsxloc += sxinc;
862 if (x < 0) {
863 *dst = x << 8;
864 }
865 dst++;
866 }
867
868 #pragma pipeloop(0)
869 for (; dst <= dst_end - 2; dst += 2) {
870 mlib_f32 *p0 = (mlib_f32*)pixLut + src[tmpsxloc >> shift];
871 mlib_f32 *p1 = (mlib_f32*)pixLut + src[(tmpsxloc + sxinc) >> shift];
872 dd = vis_freg_pair(*p0, *p1);
873 mask = (((*(mlib_u8*)p0) >> 6) & 2) | ((*(mlib_u8*)p1) >> 7);
874 dd = vis_fand(vis_faligndata(dd, dd), maskRGBx);
875 tmpsxloc += 2*sxinc;
876 vis_pst_32(dd, dst, mask);
877 }
878
879 for (; dst < dst_end; dst++) {
880 x = pixLut[src[tmpsxloc >> shift]];
881 tmpsxloc += sxinc;
882 if (x < 0) {
883 *dst = x << 8;
884 }
885 }
886
887 PTR_ADD(dstBase, dstScan);
888 syloc += syinc;
889 }
890 }
891
892 /***************************************************************/
893
ADD_SUFF(ByteIndexedBmToIntRgbxXparBgCopy)894 void ADD_SUFF(ByteIndexedBmToIntRgbxXparBgCopy)(BCOPY_PARAMS)
895 {
896 jint *pixLut = pSrcInfo->lutBase;
897 mlib_s32 dstScan = pDstInfo->scanStride;
898 mlib_s32 srcScan = pSrcInfo->scanStride;
899 mlib_d64 dd, d_bgpixel, maskRGBx;
900 mlib_s32 j, x, mask;
901
902 if (srcScan == width && dstScan == 4*width) {
903 width *= height;
904 height = 1;
905 }
906
907 vis_alignaddr(NULL, 1);
908 maskRGBx = vis_to_double_dup(0xFFFFFF00);
909 d_bgpixel = vis_to_double_dup(bgpixel);
910
911 for (j = 0; j < height; j++) {
912 mlib_u8 *src = srcBase;
913 mlib_s32 *dst = dstBase;
914 mlib_s32 *dst_end;
915
916 dst_end = dst + width;
917
918 if ((mlib_s32)dst & 7) {
919 x = pixLut[*src++];
920 if (x < 0) {
921 *dst = x << 8;
922 } else {
923 *dst = bgpixel;
924 }
925 dst++;
926 }
927
928 #pragma pipeloop(0)
929 for (; dst <= (dst_end - 2); dst += 2) {
930 mlib_f32 *pp0 = (mlib_f32*)pixLut + src[0];
931 mlib_f32 *pp1 = (mlib_f32*)pixLut + src[1];
932 dd = vis_freg_pair(*pp0, *pp1);
933 mask = (((*(mlib_u8*)pp0) >> 6) & 2) | ((*(mlib_u8*)pp1) >> 7);
934 dd = vis_fand(vis_faligndata(dd, dd), maskRGBx);
935 *(mlib_d64*)dst = d_bgpixel;
936 vis_pst_32(dd, dst, mask);
937 src += 2;
938 }
939
940 while (dst < dst_end) {
941 x = pixLut[*src++];
942 if (x < 0) {
943 *dst = x << 8;
944 } else {
945 *dst = bgpixel;
946 }
947 dst++;
948 }
949
950 PTR_ADD(dstBase, dstScan);
951 PTR_ADD(srcBase, srcScan);
952 }
953 }
954
955 /***************************************************************/
956
ADD_SUFF(IntRgbxDrawGlyphListAA)957 void ADD_SUFF(IntRgbxDrawGlyphListAA)(GLYPH_LIST_PARAMS)
958 {
959 mlib_s32 glyphCounter;
960 mlib_s32 scan = pRasInfo->scanStride;
961 mlib_u8 *dstBase;
962 mlib_s32 j;
963 mlib_d64 dmix0, dmix1, dd, d0, d1, e0, e1, fgpixel_d;
964 mlib_d64 done, done16, d_half, maskRGB, dzero;
965 mlib_s32 pix, mask, mask_z;
966 mlib_f32 srcG_f;
967
968 done = vis_to_double_dup(0x7fff7fff);
969 done16 = vis_to_double_dup(0x7fff);
970 d_half = vis_to_double_dup((1 << (16 + 6)) | (1 << 6));
971
972 fgpixel_d = vis_to_double_dup(fgpixel);
973 srcG_f = vis_to_float(argbcolor << 8);
974 maskRGB = vis_to_double_dup(0xffffff00);
975 dzero = vis_fzero();
976
977 vis_write_gsr(0 << 3);
978
979 for (glyphCounter = 0; glyphCounter < totalGlyphs; glyphCounter++) {
980 const jubyte *pixels;
981 unsigned int rowBytes;
982 int left, top;
983 int width, height;
984 int right, bottom;
985
986 pixels = (const jubyte *) glyphs[glyphCounter].pixels;
987
988 if (!pixels) continue;
989
990 left = glyphs[glyphCounter].x;
991 top = glyphs[glyphCounter].y;
992 width = glyphs[glyphCounter].width;
993 height = glyphs[glyphCounter].height;
994 rowBytes = width;
995 right = left + width;
996 bottom = top + height;
997 if (left < clipLeft) {
998 pixels += clipLeft - left;
999 left = clipLeft;
1000 }
1001 if (top < clipTop) {
1002 pixels += (clipTop - top) * rowBytes;
1003 top = clipTop;
1004 }
1005 if (right > clipRight) {
1006 right = clipRight;
1007 }
1008 if (bottom > clipBottom) {
1009 bottom = clipBottom;
1010 }
1011 if (right <= left || bottom <= top) {
1012 continue;
1013 }
1014 width = right - left;
1015 height = bottom - top;
1016
1017 dstBase = pRasInfo->rasBase;
1018 PTR_ADD(dstBase, top*scan + 4*left);
1019
1020 for (j = 0; j < height; j++) {
1021 mlib_u8 *src = (void*)pixels;
1022 mlib_s32 *dst, *dst_end;
1023
1024 dst = (void*)dstBase;
1025 dst_end = dst + width;
1026
1027 if ((mlib_s32)dst & 7) {
1028 pix = *src++;
1029 if (pix) {
1030 dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
1031 dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
1032 *(mlib_f32*)dst = vis_fands(vis_fpack16(dd),
1033 vis_read_hi(maskRGB));
1034 if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
1035 }
1036 dst++;
1037 }
1038
1039 #pragma pipeloop(0)
1040 for (; dst <= (dst_end - 2); dst += 2) {
1041 dmix0 = vis_freg_pair(((mlib_f32 *)vis_mul8s_tbl)[src[0]],
1042 ((mlib_f32 *)vis_mul8s_tbl)[src[1]]);
1043 mask = vis_fcmplt32(dmix0, done16);
1044 mask_z = vis_fcmpne32(dmix0, dzero);
1045 dmix1 = vis_fpsub16(done, dmix0);
1046 src += 2;
1047
1048 dd = *(mlib_d64*)dst;
1049 d0 = vis_fmul8x16al(srcG_f, vis_read_hi(dmix0));
1050 d1 = vis_fmul8x16al(srcG_f, vis_read_lo(dmix0));
1051 e0 = vis_fmul8x16al(vis_read_hi(dd), vis_read_hi(dmix1));
1052 e1 = vis_fmul8x16al(vis_read_lo(dd), vis_read_lo(dmix1));
1053 d0 = vis_fpadd16(vis_fpadd16(d0, d_half), e0);
1054 d1 = vis_fpadd16(vis_fpadd16(d1, d_half), e1);
1055 dd = vis_fpack16_pair(d0, d1);
1056 dd = vis_fand(dd, maskRGB);
1057
1058 vis_pst_32(fgpixel_d, dst, mask_z);
1059 vis_pst_32(dd, dst, mask & mask_z);
1060 }
1061
1062 while (dst < dst_end) {
1063 pix = *src++;
1064 if (pix) {
1065 dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
1066 dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
1067 *(mlib_f32*)dst = vis_fands(vis_fpack16(dd),
1068 vis_read_hi(maskRGB));
1069 if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
1070 }
1071 dst++;
1072 }
1073
1074 PTR_ADD(dstBase, scan);
1075 pixels += rowBytes;
1076 }
1077 }
1078 }
1079
1080 /***************************************************************/
1081
1082 #endif /* JAVA2D_NO_MLIB */
1083