1 /*
2 * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 #include <stdlib.h>
27 #include "jni_util.h"
28 #include "math.h"
29
30 #include "GraphicsPrimitiveMgr.h"
31 #include "Region.h"
32
33 #include "sun_java2d_loops_TransformHelper.h"
34 #include "java_awt_image_AffineTransformOp.h"
35
36 /*
37 * The stub functions replace the bilinear and bicubic interpolation
38 * functions with NOP versions so that the performance of the helper
39 * functions that fetch the data can be more directly tested. They
40 * are not compiled or enabled by default. Change the following
41 * #undef to a #define to build the stub functions.
42 *
43 * When compiled, they are enabled by the environment variable TXSTUB.
44 * When compiled, there is also code to disable the VIS versions and
45 * use the C versions in this file in their place by defining the TXNOVIS
46 * environment variable.
47 */
48 #undef MAKE_STUBS
49
50 /* The number of IntArgbPre samples to store in the temporary buffer. */
51 #define LINE_SIZE 2048
52
53 /* The size of a stack allocated buffer to hold edge coordinates (see below). */
54 #define MAXEDGES 1024
55
56 /* Declare the software interpolation functions. */
57 static TransformInterpFunc BilinearInterp;
58 static TransformInterpFunc BicubicInterp;
59
60 #ifdef MAKE_STUBS
61 /* Optionally Declare the stub interpolation functions. */
62 static TransformInterpFunc BilinearInterpStub;
63 static TransformInterpFunc BicubicInterpStub;
64 #endif /* MAKE_STUBS */
65
66 /*
67 * Initially choose the software interpolation functions.
68 * These choices can be overridden by platform code that runs during the
69 * primitive registration phase of initialization by storing pointers to
70 * better functions in these pointers.
71 * Compiling the stubs also turns on code below that can re-install the
72 * software functions or stub functions on the first call to this primitive.
73 */
74 TransformInterpFunc *pBilinearFunc = BilinearInterp;
75 TransformInterpFunc *pBicubicFunc = BicubicInterp;
76
77 /*
78 * The dxydxy parameters of the inverse transform determine how
79 * quickly we step through the source image. For tiny scale
80 * factors (on the order of 1E-16 or so) the stepping distances
81 * are huge. The image has been scaled so small that stepping
82 * a single pixel in device space moves the sampling point by
83 * billions (or more) pixels in the source image space. These
84 * huge stepping values can overflow the whole part of the longs
85 * we use for the fixed point stepping equations and so we need
86 * a more robust solution. We could simply iterate over every
87 * device pixel, use the inverse transform to transform it back
88 * into the source image coordinate system and then test it for
89 * being in range and sample pixel-by-pixel, but that is quite
90 * a bit more expensive. Fortunately, if the scale factors are
91 * so tiny that we overflow our long values then the number of
92 * pixels we are planning to visit should be very tiny. The only
93 * exception to that rule is if the scale factor along one
94 * dimension is tiny (creating the huge stepping values), and
95 * the scale factor along the other dimension is fairly regular
96 * or an up-scale. In that case we have a lot of pixels along
97 * the direction of the larger axis to sample, but few along the
98 * smaller axis. Though, pessimally, with an added shear factor
99 * such a linearly tiny image could have bounds that cover a large
100 * number of pixels. Such odd transformations should be very
101 * rare and the absolute limit on calculations would involve a
102 * single reverse transform of every pixel in the output image
103 * which is not fast, but it should not cause an undue stall
104 * of the rendering software.
105 *
106 * The specific test we will use is to calculate the inverse
107 * transformed values of every corner of the destination bounds
108 * (in order to be user-clip independent) and if we can
109 * perform a fixed-point-long inverse transform of all of
110 * those points without overflowing we will use the fast
111 * fixed point algorithm. Otherwise we will use the safe
112 * per-pixel transform algorithm.
113 * The 4 corners are 0,0, 0,dsth, dstw,0, dstw,dsth
114 * Transformed they are:
115 * tx, ty
116 * tx +dxdy*H, ty +dydy*H
117 * tx+dxdx*W, ty+dydx*W
118 * tx+dxdx*W+dxdy*H, ty+dydx*W+dydy*H
119 */
120 /* We reject coordinates not less than 1<<30 so that the distance between */
121 /* any 2 of them is less than 1<<31 which would overflow into the sign */
122 /* bit of a signed long value used to represent fixed point coordinates. */
123 #define TX_FIXED_UNSAFE(v) (fabs(v) >= (1<<30))
124 static jboolean
checkOverflow(jint dxoff,jint dyoff,SurfaceDataBounds * pBounds,TransformInfo * pItxInfo,jdouble * retx,jdouble * rety)125 checkOverflow(jint dxoff, jint dyoff,
126 SurfaceDataBounds *pBounds,
127 TransformInfo *pItxInfo,
128 jdouble *retx, jdouble *rety)
129 {
130 jdouble x, y;
131
132 x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */
133 y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */
134 Transform_transform(pItxInfo, &x, &y);
135 *retx = x;
136 *rety = y;
137 if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
138 return JNI_TRUE;
139 }
140
141 x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */
142 y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */
143 Transform_transform(pItxInfo, &x, &y);
144 if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
145 return JNI_TRUE;
146 }
147
148 x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */
149 y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */
150 Transform_transform(pItxInfo, &x, &y);
151 if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
152 return JNI_TRUE;
153 }
154
155 x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */
156 y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */
157 Transform_transform(pItxInfo, &x, &y);
158 if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
159 return JNI_TRUE;
160 }
161
162 return JNI_FALSE;
163 }
164
165 /*
166 * Fill the edge buffer with pairs of coordinates representing the maximum
167 * left and right pixels of the destination surface that should be processed
168 * on each scanline, clipped to the bounds parameter.
169 * The number of scanlines to calculate is implied by the bounds parameter.
170 * Only pixels that map back through the specified (inverse) transform to a
171 * source coordinate that falls within the (0, 0, sw, sh) bounds of the
172 * source image should be processed.
173 * pEdges points to an array of jints that holds 2 + numedges*2 values where
174 * numedges should match (pBounds->y2 - pBounds->y1).
175 * The first two jints in pEdges should be set to y1 and y2 and every pair
176 * of jints after that represent the xmin,xmax of all pixels in range of
177 * the transformed blit for the corresponding scanline.
178 */
179 static void
calculateEdges(jint * pEdges,SurfaceDataBounds * pBounds,TransformInfo * pItxInfo,jlong xbase,jlong ybase,juint sw,juint sh)180 calculateEdges(jint *pEdges,
181 SurfaceDataBounds *pBounds,
182 TransformInfo *pItxInfo,
183 jlong xbase, jlong ybase,
184 juint sw, juint sh)
185 {
186 jlong dxdxlong, dydxlong;
187 jlong dxdylong, dydylong;
188 jlong drowxlong, drowylong;
189 jint dx1, dy1, dx2, dy2;
190
191 dxdxlong = DblToLong(pItxInfo->dxdx);
192 dydxlong = DblToLong(pItxInfo->dydx);
193 dxdylong = DblToLong(pItxInfo->dxdy);
194 dydylong = DblToLong(pItxInfo->dydy);
195
196 dx1 = pBounds->x1;
197 dy1 = pBounds->y1;
198 dx2 = pBounds->x2;
199 dy2 = pBounds->y2;
200 *pEdges++ = dy1;
201 *pEdges++ = dy2;
202
203 drowxlong = (dx2-dx1-1) * dxdxlong;
204 drowylong = (dx2-dx1-1) * dydxlong;
205
206 while (dy1 < dy2) {
207 jlong xlong, ylong;
208
209 dx1 = pBounds->x1;
210 dx2 = pBounds->x2;
211
212 xlong = xbase;
213 ylong = ybase;
214 while (dx1 < dx2 &&
215 (((juint) WholeOfLong(ylong)) >= sh ||
216 ((juint) WholeOfLong(xlong)) >= sw))
217 {
218 dx1++;
219 xlong += dxdxlong;
220 ylong += dydxlong;
221 }
222
223 xlong = xbase + drowxlong;
224 ylong = ybase + drowylong;
225 while (dx2 > dx1 &&
226 (((juint) WholeOfLong(ylong)) >= sh ||
227 ((juint) WholeOfLong(xlong)) >= sw))
228 {
229 dx2--;
230 xlong -= dxdxlong;
231 ylong -= dydxlong;
232 }
233
234 *pEdges++ = dx1;
235 *pEdges++ = dx2;
236
237 /* Increment to next scanline */
238 xbase += dxdylong;
239 ybase += dydylong;
240 dy1++;
241 }
242 }
243
244 static void
245 Transform_SafeHelper(JNIEnv *env,
246 SurfaceDataOps *srcOps,
247 SurfaceDataOps *dstOps,
248 SurfaceDataRasInfo *pSrcInfo,
249 SurfaceDataRasInfo *pDstInfo,
250 NativePrimitive *pMaskBlitPrim,
251 CompositeInfo *pCompInfo,
252 TransformHelperFunc *pHelperFunc,
253 TransformInterpFunc *pInterpFunc,
254 RegionData *pClipInfo, TransformInfo *pItxInfo,
255 jint *pData, jint *pEdges,
256 jint dxoff, jint dyoff, jint sw, jint sh);
257
258 /*
259 * Class: sun_java2d_loops_TransformHelper
260 * Method: Transform
261 * Signature: (Lsun/java2d/loops/MaskBlit;Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;Ljava/awt/geom/AffineTransform;IIIIIIIII[I)V
262 */
263 JNIEXPORT void JNICALL
Java_sun_java2d_loops_TransformHelper_Transform(JNIEnv * env,jobject self,jobject maskblit,jobject srcData,jobject dstData,jobject comp,jobject clip,jobject itxform,jint txtype,jint sx1,jint sy1,jint sx2,jint sy2,jint dx1,jint dy1,jint dx2,jint dy2,jintArray edgeArray,jint dxoff,jint dyoff)264 Java_sun_java2d_loops_TransformHelper_Transform
265 (JNIEnv *env, jobject self,
266 jobject maskblit,
267 jobject srcData, jobject dstData,
268 jobject comp, jobject clip,
269 jobject itxform, jint txtype,
270 jint sx1, jint sy1, jint sx2, jint sy2,
271 jint dx1, jint dy1, jint dx2, jint dy2,
272 jintArray edgeArray, jint dxoff, jint dyoff)
273 {
274 SurfaceDataOps *srcOps;
275 SurfaceDataOps *dstOps;
276 SurfaceDataRasInfo srcInfo;
277 SurfaceDataRasInfo dstInfo;
278 NativePrimitive *pHelperPrim;
279 NativePrimitive *pMaskBlitPrim;
280 CompositeInfo compInfo;
281 RegionData clipInfo;
282 TransformInfo itxInfo;
283 jint maxlinepix;
284 TransformHelperFunc *pHelperFunc;
285 TransformInterpFunc *pInterpFunc;
286 jdouble xorig, yorig;
287 jlong numedges;
288 jint *pEdges;
289 jint edgebuf[2 + MAXEDGES * 2];
290 union {
291 jlong align;
292 jint data[LINE_SIZE];
293 } rgb;
294
295 #ifdef MAKE_STUBS
296 static int th_initialized;
297
298 /* For debugging only - used to swap in alternate funcs for perf testing */
299 if (!th_initialized) {
300 if (getenv("TXSTUB") != 0) {
301 pBilinearFunc = BilinearInterpStub;
302 pBicubicFunc = BicubicInterpStub;
303 } else if (getenv("TXNOVIS") != 0) {
304 pBilinearFunc = BilinearInterp;
305 pBicubicFunc = BicubicInterp;
306 }
307 th_initialized = 1;
308 }
309 #endif /* MAKE_STUBS */
310
311 pHelperPrim = GetNativePrim(env, self);
312 if (pHelperPrim == NULL) {
313 /* Should never happen... */
314 return;
315 }
316 pMaskBlitPrim = GetNativePrim(env, maskblit);
317 if (pMaskBlitPrim == NULL) {
318 /* Exception was thrown by GetNativePrim */
319 return;
320 }
321 if (pMaskBlitPrim->pCompType->getCompInfo != NULL) {
322 (*pMaskBlitPrim->pCompType->getCompInfo)(env, &compInfo, comp);
323 }
324 if (Region_GetInfo(env, clip, &clipInfo)) {
325 return;
326 }
327
328 srcOps = SurfaceData_GetOps(env, srcData);
329 if (srcOps == 0) {
330 return;
331 }
332 dstOps = SurfaceData_GetOps(env, dstData);
333 if (dstOps == 0) {
334 return;
335 }
336
337 /*
338 * Grab the appropriate pointer to the helper and interpolation
339 * routines and calculate the maximum number of destination pixels
340 * that can be processed in one intermediate buffer based on the
341 * size of the buffer and the number of samples needed per pixel.
342 */
343 switch (txtype) {
344 case java_awt_image_AffineTransformOp_TYPE_NEAREST_NEIGHBOR:
345 pHelperFunc = pHelperPrim->funcs.transformhelpers->nnHelper;
346 pInterpFunc = NULL;
347 maxlinepix = LINE_SIZE;
348 break;
349 case java_awt_image_AffineTransformOp_TYPE_BILINEAR:
350 pHelperFunc = pHelperPrim->funcs.transformhelpers->blHelper;
351 pInterpFunc = pBilinearFunc;
352 maxlinepix = LINE_SIZE / 4;
353 break;
354 case java_awt_image_AffineTransformOp_TYPE_BICUBIC:
355 pHelperFunc = pHelperPrim->funcs.transformhelpers->bcHelper;
356 pInterpFunc = pBicubicFunc;
357 maxlinepix = LINE_SIZE / 16;
358 break;
359 default:
360 // Should not happen, but just in case.
361 return;
362 }
363
364 srcInfo.bounds.x1 = sx1;
365 srcInfo.bounds.y1 = sy1;
366 srcInfo.bounds.x2 = sx2;
367 srcInfo.bounds.y2 = sy2;
368 dstInfo.bounds.x1 = dx1;
369 dstInfo.bounds.y1 = dy1;
370 dstInfo.bounds.x2 = dx2;
371 dstInfo.bounds.y2 = dy2;
372 SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
373 if (srcOps->Lock(env, srcOps, &srcInfo, pHelperPrim->srcflags)
374 != SD_SUCCESS)
375 {
376 /* edgeArray should already contain zeros for min/maxy */
377 return;
378 }
379 if (dstOps->Lock(env, dstOps, &dstInfo, pMaskBlitPrim->dstflags)
380 != SD_SUCCESS)
381 {
382 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
383 /* edgeArray should already contain zeros for min/maxy */
384 return;
385 }
386 Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
387 Transform_GetInfo(env, itxform, &itxInfo);
388
389 numedges = (((jlong) dstInfo.bounds.y2) - ((jlong) dstInfo.bounds.y1));
390 if (numedges <= 0) {
391 pEdges = NULL;
392 } else if (!JNU_IsNull(env, edgeArray)) {
393 /*
394 * Ideally Java should allocate an array large enough, but if
395 * we ever have a miscommunication about the number of edge
396 * lines, or if the Java array calculation should overflow to
397 * a positive number and succeed in allocating an array that
398 * is too small, we need to verify that it can still hold the
399 * number of integers that we plan to store to be safe.
400 */
401 jsize edgesize = (*env)->GetArrayLength(env, edgeArray);
402 /* (edgesize/2 - 1) should avoid any overflow or underflow. */
403 pEdges = (((edgesize / 2) - 1) >= numedges)
404 ? (*env)->GetPrimitiveArrayCritical(env, edgeArray, NULL)
405 : NULL;
406 } else if (numedges > MAXEDGES) {
407 /* numedges variable (jlong) can be at most ((1<<32)-1) */
408 /* memsize can overflow a jint, but not a jlong */
409 jlong memsize = ((numedges * 2) + 2) * sizeof(*pEdges);
410 pEdges = (memsize == ((size_t) memsize))
411 ? malloc((size_t) memsize)
412 : NULL;
413 } else {
414 pEdges = edgebuf;
415 }
416
417 if (pEdges == NULL) {
418 if (!(*env)->ExceptionCheck(env) && numedges > 0) {
419 JNU_ThrowInternalError(env, "Unable to allocate edge list");
420 }
421 SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
422 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
423 /* edgeArray should already contain zeros for min/maxy */
424 return;
425 }
426
427
428 if (!Region_IsEmpty(&clipInfo)) {
429 srcOps->GetRasInfo(env, srcOps, &srcInfo);
430 dstOps->GetRasInfo(env, dstOps, &dstInfo);
431 if (srcInfo.rasBase == NULL || dstInfo.rasBase == NULL) {
432 pEdges[0] = pEdges[1] = 0;
433 } else if (checkOverflow(dxoff, dyoff, &dstInfo.bounds,
434 &itxInfo, &xorig, &yorig))
435 {
436 Transform_SafeHelper(env, srcOps, dstOps,
437 &srcInfo, &dstInfo,
438 pMaskBlitPrim, &compInfo,
439 pHelperFunc, pInterpFunc,
440 &clipInfo, &itxInfo, rgb.data, pEdges,
441 dxoff, dyoff, sx2-sx1, sy2-sy1);
442 } else {
443 SurfaceDataBounds span;
444 jlong dxdxlong, dydxlong;
445 jlong dxdylong, dydylong;
446 jlong xbase, ybase;
447
448 dxdxlong = DblToLong(itxInfo.dxdx);
449 dydxlong = DblToLong(itxInfo.dydx);
450 dxdylong = DblToLong(itxInfo.dxdy);
451 dydylong = DblToLong(itxInfo.dydy);
452 xbase = DblToLong(xorig);
453 ybase = DblToLong(yorig);
454
455 calculateEdges(pEdges, &dstInfo.bounds, &itxInfo,
456 xbase, ybase, sx2-sx1, sy2-sy1);
457
458 Region_StartIteration(env, &clipInfo);
459 while (Region_NextIteration(&clipInfo, &span)) {
460 jlong rowxlong, rowylong;
461 void *pDst;
462
463 dy1 = span.y1;
464 dy2 = span.y2;
465 rowxlong = xbase + (dy1 - dstInfo.bounds.y1) * dxdylong;
466 rowylong = ybase + (dy1 - dstInfo.bounds.y1) * dydylong;
467
468 while (dy1 < dy2) {
469 jlong xlong, ylong;
470
471 /* Note - process at most one scanline at a time. */
472
473 dx1 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 2];
474 dx2 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 3];
475 if (dx1 < span.x1) dx1 = span.x1;
476 if (dx2 > span.x2) dx2 = span.x2;
477
478 /* All pixels from dx1 to dx2 have centers in bounds */
479 while (dx1 < dx2) {
480 /* Can process at most one buffer full at a time */
481 jint numpix = dx2 - dx1;
482 if (numpix > maxlinepix) {
483 numpix = maxlinepix;
484 }
485
486 xlong =
487 rowxlong + ((dx1 - dstInfo.bounds.x1) * dxdxlong);
488 ylong =
489 rowylong + ((dx1 - dstInfo.bounds.x1) * dydxlong);
490
491 /* Get IntArgbPre pixel data from source */
492 (*pHelperFunc)(&srcInfo,
493 rgb.data, numpix,
494 xlong, dxdxlong,
495 ylong, dydxlong);
496
497 /* Interpolate result pixels if needed */
498 if (pInterpFunc) {
499 (*pInterpFunc)(rgb.data, numpix,
500 FractOfLong(xlong-LongOneHalf),
501 FractOfLong(dxdxlong),
502 FractOfLong(ylong-LongOneHalf),
503 FractOfLong(dydxlong));
504 }
505
506 /* Store/Composite interpolated pixels into dest */
507 pDst = PtrCoord(dstInfo.rasBase,
508 dx1, dstInfo.pixelStride,
509 dy1, dstInfo.scanStride);
510 (*pMaskBlitPrim->funcs.maskblit)(pDst, rgb.data,
511 0, 0, 0,
512 numpix, 1,
513 &dstInfo, &srcInfo,
514 pMaskBlitPrim,
515 &compInfo);
516
517 /* Increment to next buffer worth of input pixels */
518 dx1 += maxlinepix;
519 }
520
521 /* Increment to next scanline */
522 rowxlong += dxdylong;
523 rowylong += dydylong;
524 dy1++;
525 }
526 }
527 Region_EndIteration(env, &clipInfo);
528 }
529 SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
530 SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
531 } else {
532 pEdges[0] = pEdges[1] = 0;
533 }
534
535 if (!JNU_IsNull(env, edgeArray)) {
536 (*env)->ReleasePrimitiveArrayCritical(env, edgeArray, pEdges, 0);
537 } else if (pEdges != edgebuf) {
538 free(pEdges);
539 }
540 SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
541 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
542 }
543
544 static void
Transform_SafeHelper(JNIEnv * env,SurfaceDataOps * srcOps,SurfaceDataOps * dstOps,SurfaceDataRasInfo * pSrcInfo,SurfaceDataRasInfo * pDstInfo,NativePrimitive * pMaskBlitPrim,CompositeInfo * pCompInfo,TransformHelperFunc * pHelperFunc,TransformInterpFunc * pInterpFunc,RegionData * pClipInfo,TransformInfo * pItxInfo,jint * pData,jint * pEdges,jint dxoff,jint dyoff,jint sw,jint sh)545 Transform_SafeHelper(JNIEnv *env,
546 SurfaceDataOps *srcOps,
547 SurfaceDataOps *dstOps,
548 SurfaceDataRasInfo *pSrcInfo,
549 SurfaceDataRasInfo *pDstInfo,
550 NativePrimitive *pMaskBlitPrim,
551 CompositeInfo *pCompInfo,
552 TransformHelperFunc *pHelperFunc,
553 TransformInterpFunc *pInterpFunc,
554 RegionData *pClipInfo, TransformInfo *pItxInfo,
555 jint *pData, jint *pEdges,
556 jint dxoff, jint dyoff, jint sw, jint sh)
557 {
558 SurfaceDataBounds span;
559 jint dx1, dx2;
560 jint dy1, dy2;
561 jint i, iy;
562
563 dy1 = pDstInfo->bounds.y1;
564 dy2 = pDstInfo->bounds.y2;
565 dx1 = pDstInfo->bounds.x1;
566 dx2 = pDstInfo->bounds.x2;
567 pEdges[0] = dy1;
568 pEdges[1] = dy2;
569 for (iy = dy1; iy < dy2; iy++) {
570 jint i = (iy - dy1) * 2;
571 /* row spans are set to max,min until we find a pixel in range below */
572 pEdges[i + 2] = dx2;
573 pEdges[i + 3] = dx1;
574 }
575
576 Region_StartIteration(env, pClipInfo);
577 while (Region_NextIteration(pClipInfo, &span)) {
578 dy1 = span.y1;
579 dy2 = span.y2;
580 while (dy1 < dy2) {
581 dx1 = span.x1;
582 dx2 = span.x2;
583 i = (dy1 - pDstInfo->bounds.y1) * 2;
584 while (dx1 < dx2) {
585 jdouble x, y;
586 jlong xlong, ylong;
587
588 x = dxoff + dx1 + 0.5;
589 y = dyoff + dy1 + 0.5;
590 Transform_transform(pItxInfo, &x, &y);
591 xlong = DblToLong(x);
592 ylong = DblToLong(y);
593
594 /* Process only pixels with centers in bounds
595 * Test double values to avoid overflow in conversion
596 * to long values and then also test the long values
597 * in case they rounded up and out of bounds during
598 * the conversion.
599 */
600 if (x >= 0 && y >= 0 && x < sw && y < sh &&
601 WholeOfLong(xlong) < sw &&
602 WholeOfLong(ylong) < sh)
603 {
604 void *pDst;
605
606 if (pEdges[i + 2] > dx1) {
607 pEdges[i + 2] = dx1;
608 }
609 if (pEdges[i + 3] <= dx1) {
610 pEdges[i + 3] = dx1 + 1;
611 }
612
613 /* Get IntArgbPre pixel data from source */
614 (*pHelperFunc)(pSrcInfo,
615 pData, 1,
616 xlong, 0,
617 ylong, 0);
618
619 /* Interpolate result pixels if needed */
620 if (pInterpFunc) {
621 (*pInterpFunc)(pData, 1,
622 FractOfLong(xlong-LongOneHalf), 0,
623 FractOfLong(ylong-LongOneHalf), 0);
624 }
625
626 /* Store/Composite interpolated pixels into dest */
627 pDst = PtrCoord(pDstInfo->rasBase,
628 dx1, pDstInfo->pixelStride,
629 dy1, pDstInfo->scanStride);
630 (*pMaskBlitPrim->funcs.maskblit)(pDst, pData,
631 0, 0, 0,
632 1, 1,
633 pDstInfo, pSrcInfo,
634 pMaskBlitPrim,
635 pCompInfo);
636 }
637
638 /* Increment to next input pixel */
639 dx1++;
640 }
641
642 /* Increment to next scanline */
643 dy1++;
644 }
645 }
646 Region_EndIteration(env, pClipInfo);
647 }
648
649 #define BL_INTERP_V1_to_V2_by_F(v1, v2, f) \
650 (((v1)<<8) + ((v2)-(v1))*(f))
651
652 #define BL_ACCUM(comp) \
653 do { \
654 jint c1 = ((jubyte *) pRGB)[comp]; \
655 jint c2 = ((jubyte *) pRGB)[comp+4]; \
656 jint cR = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \
657 c1 = ((jubyte *) pRGB)[comp+8]; \
658 c2 = ((jubyte *) pRGB)[comp+12]; \
659 c2 = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \
660 cR = BL_INTERP_V1_to_V2_by_F(cR, c2, yfactor); \
661 ((jubyte *)pRes)[comp] = (jubyte) ((cR + (1<<15)) >> 16); \
662 } while (0)
663
664 static void
BilinearInterp(jint * pRGB,jint numpix,jint xfract,jint dxfract,jint yfract,jint dyfract)665 BilinearInterp(jint *pRGB, jint numpix,
666 jint xfract, jint dxfract,
667 jint yfract, jint dyfract)
668 {
669 jint j;
670 jint *pRes = pRGB;
671
672 for (j = 0; j < numpix; j++) {
673 jint xfactor;
674 jint yfactor;
675 xfactor = URShift(xfract, 32-8);
676 yfactor = URShift(yfract, 32-8);
677 BL_ACCUM(0);
678 BL_ACCUM(1);
679 BL_ACCUM(2);
680 BL_ACCUM(3);
681 pRes++;
682 pRGB += 4;
683 xfract += dxfract;
684 yfract += dyfract;
685 }
686 }
687
688 #define SAT(val, max) \
689 do { \
690 val &= ~(val >> 31); /* negatives become 0 */ \
691 val -= max; /* only overflows are now positive */ \
692 val &= (val >> 31); /* positives become 0 */ \
693 val += max; /* range is now [0 -> max] */ \
694 } while (0)
695
696 /* For x86, integer multiplies are faster than floating point */
697 /* Note that on x86 Linux the choice of best algorithm varies
698 * depending on the compiler optimization and the processor type.
699 * Currently, the sun/awt x86 Linux builds are not optimized so
700 * all the variations produce mediocre performance.
701 * For now we will use the choice that works best for the Windows
702 * build until the (lack of) optimization issues on Linux are resolved.
703 */
704 #define BICUBIC_USE_INT_MATH
705
706 #ifdef BICUBIC_USE_DBL_CAST
707
708 #define BC_DblToCoeff(v) (v)
709 #define BC_COEFF_ONE 1.0
710 #define BC_TYPE jdouble
711 #define BC_V_HALF 0.5
712 #define BC_CompToV(v) ((jdouble) (v))
713 #define BC_STORE_COMPS(pRes) \
714 do { \
715 jint a = (jint) accumA; \
716 jint r = (jint) accumR; \
717 jint g = (jint) accumG; \
718 jint b = (jint) accumB; \
719 SAT(a, 255); \
720 SAT(r, a); \
721 SAT(g, a); \
722 SAT(b, a); \
723 *pRes = ((a << 24) | (r << 16) | (g << 8) | (b)); \
724 } while (0)
725
726 #endif /* BICUBIC_USE_DBL_CAST */
727
728 #ifdef BICUBIC_USE_DBL_LUT
729
730 #define ItoD1(v) ((jdouble) (v))
731 #define ItoD4(v) ItoD1(v), ItoD1(v+1), ItoD1(v+2), ItoD1(v+3)
732 #define ItoD16(v) ItoD4(v), ItoD4(v+4), ItoD4(v+8), ItoD4(v+12)
733 #define ItoD64(v) ItoD16(v), ItoD16(v+16), ItoD16(v+32), ItoD16(v+48)
734
735 static jdouble ItoD_table[] = {
736 ItoD64(0), ItoD64(64), ItoD64(128), ItoD64(192)
737 };
738
739 #define BC_DblToCoeff(v) (v)
740 #define BC_COEFF_ONE 1.0
741 #define BC_TYPE jdouble
742 #define BC_V_HALF 0.5
743 #define BC_CompToV(v) ItoD_table[v]
744 #define BC_STORE_COMPS(pRes) \
745 do { \
746 jint a = (jint) accumA; \
747 jint r = (jint) accumR; \
748 jint g = (jint) accumG; \
749 jint b = (jint) accumB; \
750 SAT(a, 255); \
751 SAT(r, a); \
752 SAT(g, a); \
753 SAT(b, a); \
754 *pRes = ((a << 24) | (r << 16) | (g << 8) | (b)); \
755 } while (0)
756
757 #endif /* BICUBIC_USE_DBL_LUT */
758
759 #ifdef BICUBIC_USE_INT_MATH
760
761 #define BC_DblToCoeff(v) ((jint) ((v) * 256))
762 #define BC_COEFF_ONE 256
763 #define BC_TYPE jint
764 #define BC_V_HALF (1 << 15)
765 #define BC_CompToV(v) ((jint) v)
766 #define BC_STORE_COMPS(pRes) \
767 do { \
768 accumA >>= 16; \
769 accumR >>= 16; \
770 accumG >>= 16; \
771 accumB >>= 16; \
772 SAT(accumA, 255); \
773 SAT(accumR, accumA); \
774 SAT(accumG, accumA); \
775 SAT(accumB, accumA); \
776 *pRes = ((accumA << 24) | (accumR << 16) | (accumG << 8) | (accumB)); \
777 } while (0)
778
779 #endif /* BICUBIC_USE_INT_MATH */
780
781 #define BC_ACCUM(index, ycindex, xcindex) \
782 do { \
783 BC_TYPE factor = bicubic_coeff[xcindex] * bicubic_coeff[ycindex]; \
784 int rgb; \
785 rgb = pRGB[index]; \
786 accumB += BC_CompToV((rgb >> 0) & 0xff) * factor; \
787 accumG += BC_CompToV((rgb >> 8) & 0xff) * factor; \
788 accumR += BC_CompToV((rgb >> 16) & 0xff) * factor; \
789 accumA += BC_CompToV((rgb >> 24) & 0xff) * factor; \
790 } while (0)
791
792 static BC_TYPE bicubic_coeff[513];
793 static jboolean bicubictableinited;
794
795 static void
init_bicubic_table(jdouble A)796 init_bicubic_table(jdouble A)
797 {
798 /*
799 * The following formulas are designed to give smooth
800 * results when 'A' is -0.5 or -1.0.
801 */
802 int i;
803 for (i = 0; i < 256; i++) {
804 /* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */
805 jdouble x = i / 256.0;
806 x = ((A+2)*x - (A+3))*x*x + 1;
807 bicubic_coeff[i] = BC_DblToCoeff(x);
808 }
809
810 for (; i < 384; i++) {
811 /* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */
812 jdouble x = i / 256.0;
813 x = ((A*x - 5*A)*x + 8*A)*x - 4*A;
814 bicubic_coeff[i] = BC_DblToCoeff(x);
815 }
816
817 bicubic_coeff[384] = (BC_COEFF_ONE - bicubic_coeff[128]*2) / 2;
818
819 for (i++; i <= 512; i++) {
820 bicubic_coeff[i] = BC_COEFF_ONE - (bicubic_coeff[512-i] +
821 bicubic_coeff[i-256] +
822 bicubic_coeff[768-i]);
823 }
824
825 bicubictableinited = JNI_TRUE;
826 }
827
828 static void
BicubicInterp(jint * pRGB,jint numpix,jint xfract,jint dxfract,jint yfract,jint dyfract)829 BicubicInterp(jint *pRGB, jint numpix,
830 jint xfract, jint dxfract,
831 jint yfract, jint dyfract)
832 {
833 jint i;
834 jint *pRes = pRGB;
835
836 if (!bicubictableinited) {
837 init_bicubic_table(-0.5);
838 }
839
840 for (i = 0; i < numpix; i++) {
841 BC_TYPE accumA, accumR, accumG, accumB;
842 jint xfactor, yfactor;
843
844 xfactor = URShift(xfract, 32-8);
845 yfactor = URShift(yfract, 32-8);
846 accumA = accumR = accumG = accumB = BC_V_HALF;
847 BC_ACCUM(0, yfactor+256, xfactor+256);
848 BC_ACCUM(1, yfactor+256, xfactor+ 0);
849 BC_ACCUM(2, yfactor+256, 256-xfactor);
850 BC_ACCUM(3, yfactor+256, 512-xfactor);
851 BC_ACCUM(4, yfactor+ 0, xfactor+256);
852 BC_ACCUM(5, yfactor+ 0, xfactor+ 0);
853 BC_ACCUM(6, yfactor+ 0, 256-xfactor);
854 BC_ACCUM(7, yfactor+ 0, 512-xfactor);
855 BC_ACCUM(8, 256-yfactor, xfactor+256);
856 BC_ACCUM(9, 256-yfactor, xfactor+ 0);
857 BC_ACCUM(10, 256-yfactor, 256-xfactor);
858 BC_ACCUM(11, 256-yfactor, 512-xfactor);
859 BC_ACCUM(12, 512-yfactor, xfactor+256);
860 BC_ACCUM(13, 512-yfactor, xfactor+ 0);
861 BC_ACCUM(14, 512-yfactor, 256-xfactor);
862 BC_ACCUM(15, 512-yfactor, 512-xfactor);
863 BC_STORE_COMPS(pRes);
864 pRes++;
865 pRGB += 16;
866 xfract += dxfract;
867 yfract += dyfract;
868 }
869 }
870
871 #ifdef MAKE_STUBS
872
873 static void
BilinearInterpStub(jint * pRGBbase,jint numpix,jint xfract,jint dxfract,jint yfract,jint dyfract)874 BilinearInterpStub(jint *pRGBbase, jint numpix,
875 jint xfract, jint dxfract,
876 jint yfract, jint dyfract)
877 {
878 jint *pRGB = pRGBbase;
879 while (--numpix >= 0) {
880 *pRGBbase = *pRGB;
881 pRGBbase += 1;
882 pRGB += 4;
883 }
884 }
885
886 static void
BicubicInterpStub(jint * pRGBbase,jint numpix,jint xfract,jint dxfract,jint yfract,jint dyfract)887 BicubicInterpStub(jint *pRGBbase, jint numpix,
888 jint xfract, jint dxfract,
889 jint yfract, jint dyfract)
890 {
891 jint *pRGB = pRGBbase+5;
892 while (--numpix >= 0) {
893 *pRGBbase = *pRGB;
894 pRGBbase += 1;
895 pRGB += 16;
896 }
897 }
898
899 #endif /* MAKE_STUBS */
900