1 /*
2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 #include <math.h>
27 
28 #include "jni_util.h"
29 #include "GraphicsPrimitiveMgr.h"
30 #include "Region.h"
31 
32 #include "sun_java2d_loops_ScaledBlit.h"
33 
34 /*
35  * The scaling loops used inside the helper functions are based on the
36  * following pseudocode for stepping through the source image:
37  *
38  * shift - number of bits of sub-pixel precision in scaled values
39  * srcxorig, srcyorig - scaled location of first pixel
40  * srcxinc, srcyinc - scaled x and y increments
41  * dstwidth, dstheight - number of pixels to process across and down
42  *
43  * 1. srcy = srcyorig;
44  * 2. for (dstheight) {
45  * 3.     srcx = srcxorig;
46  * 4.     for (dstwidth) {
47  * 5.         fetch and process pixel for (srcx >> shift, srcy >> shift)
48  * 6.         srcx += srcxinc;
49  * 7.     }
50  * 8.     srcy += srcyinc;
51  * 9. }
52  *
53  * Note that each execution of line 6 or 8 accumulates error of
54  * +/- 1 into the scaled coordinate variables.  These lines are
55  * each executed once per pixel across or once per pixel down
56  * the region being iterated over, thus the error can accumulate
57  * up to a magnitude of dstwidth in the horizontal direction and
58  * dstheight in the vertical direction.
59  *
60  * If the error ever reaches a magnitude of (1 << shift) then we
61  * will be off by at least 1 source pixel in our mapping.
62  *
63  * Note that we increment the source coordinates by the srcxinc
64  * and srcyinc variables in each step.  Thus, if our error ever
65  * accumulates to a magnitude equal to srcxinc or srcyinc then
66  * we will be ahead or behind of "where we should be" by at least
67  * one iteration.  Since each iteration is a destination pixel,
68  * this means that our actual location will be off by at least
69  * one destination pixel.
70  *
71  * This means that all of the values:
72  *
73  *     - (1 << shift)
74  *     - srcxinc
75  *     - srcyinc
76  *
77  * all represent a maximum bound on how much error we can accumulate
78  * before we are off by a source or a destination pixel.  Thus,
79  * we should make sure that we never process more than that many
80  * pixels if we want to maintain single pixel accuracy.  Even
81  * better would be to process many fewer pixels than those bounds
82  * to ensure that our accumulated error is much smaller than a
83  * pixel.
84  */
85 
86 /*
87  * Find and return the largest tile size that is a power of 2 and
88  * which is small enough to yield some reassuring degree of subpixel
89  * accuracy.  The degree of subpixel accuracy that will be preserved
90  * by the tile size it chooses will vary and the details on how
91  * it makes this decision are detailed in the comments below.
92  */
93 static jint
findpow2tilesize(jint shift,jint sxinc,jint syinc)94 findpow2tilesize(jint shift, jint sxinc, jint syinc)
95 {
96     /*
97      * The initial value of shift is our first estimate for
98      * the power of 2 for our tilesize since it ensures
99      * less than 1 source pixel of error.
100      *
101      * Reducing it until (1 << shift) is not larger than the
102      * smallest of our increments ensures we will have no more
103      * than 1 destination pixel of error as well.
104      */
105     if (sxinc > syinc) {
106         sxinc = syinc;
107     }
108     if (sxinc == 0) {
109         /* Degenerate case will cause infinite loop in next loop... */
110         return 1;
111     }
112     while ((1 << shift) > sxinc) {
113         shift--;
114     }
115     /*
116      * shift is now the largest it can be for less than 1 pixel
117      * of error in either source or destination spaces.
118      *
119      * Now we will try for at least 8 bits of subpixel accuracy
120      * with a tile size of at least 256x256 and reduce our subpixel
121      * accuracy on a sliding scale down to a tilesize of 1x1 when
122      * we have no bits of sub-pixel accuracy.
123      */
124     if (shift >= 16) {
125         /* Subtracting 8 asks for 8 bits of subpixel accuracy. */
126         shift -= 8;
127     } else {
128         /* Ask for half of the remaining bits to be subpixel accuracy. */
129         /* Rounding is in favor of subpixel accuracy over tile size. */
130         /* Worst case, shift == 0 and tilesize == (1 << 0) == 1 */
131         shift /= 2;
132     }
133     return (1 << shift);
134 }
135 
136 /*
137  * For a given integer destination pixel coordinate "id", calculate the
138  * integer destination coordinate of the start of the "ts" sized tile
139  * in which it resides.
140  * Tiles all start at even multiples of the tile size from the integer
141  * destination origin "io".
142  *
143  * id == integer destination coordinate
144  * io == integer destination operation origin
145  * ts == tilesize (must be power of 2)
146  */
147 #define TILESTART(id, io, ts)   ((io) + (((id)-(io)) & (~((ts)-1))))
148 
149 /*
150  * For a given integer destination pixel coordinate "id", calculate the
151  * sub-pixel accurate source coordinate from which its sample comes.
152  * The returned source coordinate is expressed in a shifted fractional
153  * arithmetic number system.
154  *
155  * id == integer destination coordinate
156  * fo == floating point destination operation origin,
157  * sf == source coordinate scale factor per destination pixel
158  *       (multiplied by fractional arithmetic "shift")
159  *
160  * The caller is required to cast this value to the appropriate
161  * integer type for the needed precision.  The rendering code which
162  * deals only with valid coordinates within the bounds of the source
163  * rectangle uses jint.  The setup code, which occasionally deals
164  * with coordinates that run out of bounds, uses jlong.
165  *
166  * Note that the rounding in this calculation is at a fraction of a
167  * source pixel of (1.0 / (1<<shift)) since the scale factor includes
168  * the fractional shift.  As a result, the type of rounding used is
169  * not very significant (floor, floor(x+.5), or ceil(x-.5)), but the
170  * ceil(x-.5) version is used for consistency with the way that pixel
171  * coordinates are rounded to assign the ".5" value to the lower
172  * integer.
173  */
174 #define SRCLOC(id, fo, sf)   (ceil((((id) + 0.5) - (fo)) * (sf) - 0.5))
175 
176 /*
177  * Reverse map a srctarget coordinate into device space and refine the
178  * answer.  More specifically, what we are looking for is the smallest
179  * destination coordinate that maps to a source coordinate that is
180  * greater than or equal to the given target source coordinate.
181  *
182  * Note that since the inner loops use math that maps a destination
183  * coordinate into source space and that, even though the equation
184  * we use below is the theoretical inverse of the dst->src mapping,
185  * we cannot rely on floating point math to guarantee that applying
186  * both of these equations in sequence will give us an exact mapping
187  * of src->dst->src.  Thus, we must search back and forth to see if
188  * we really map back to the given source coordinate and that we are
189  * the smallest destination coordinate that does so.
190  *
191  * Note that, in practice, the answer from the initial guess tends to
192  * be the right answer most of the time and the loop ends up finding
193  * one iteration to be ">= srctarget" and the next to be "< srctarget"
194  * and thus finds the answer in 2 iterations.  A small number of
195  * times, the initial guess is 1 too low and so we do one iteration
196  * at "< srctarget" and the next at ">= srctarget" and again find the
197  * answer in 2 iterations.  All cases encountered during testing ended
198  * up falling into one of those 2 categories and so the loop was always
199  * executed exactly twice.
200  *
201  * Note also that the calculation of srcloc below may attempt to calculate
202  * the src location of the destination pixel which is "1 beyond" the
203  * end of the source image.  Since our shift calculation code in the
204  * main function only guaranteed that "srcw << shift" did not overflow
205  * a 32-bit signed integer, we cannot guarantee that "(srcw+1) << shift"
206  * or, more generally, "(srcw << shift)+srcinc" does not overflow.
207  * As a result, we perform our calculations here with jlong values
208  * so that we aren't affected by this overflow.  Since srcw (shifted)
209  * and srcinc are both 32-bit values, their sum cannot possibly overflow
210  * a jlong.  In fact, we can step up to a couple of billion steps of
211  * size "srcinc" past the end of the image before we have to worry
212  * about overflow - in practice, though, the search never steps more
213  * than 1 past the end of the image so this buffer is more than enough.
214  */
215 static jint
refine(jint intorigin,jdouble dblorigin,jint tilesize,jdouble scale,jint srctarget,jint srcinc)216 refine(jint intorigin, jdouble dblorigin, jint tilesize,
217        jdouble scale, jint srctarget, jint srcinc)
218 {
219     /* Make a first estimate of dest coordinate from srctarget */
220     jint dstloc = (jint) ceil(dblorigin + srctarget / scale - 0.5);
221     /* Loop until we get at least one value < and one >= the target */
222     jboolean wasneg = JNI_FALSE;
223     jboolean waspos = JNI_FALSE;
224     jlong lsrcinc = srcinc;
225     jlong lsrctarget = srctarget;
226 
227     while (JNI_TRUE) {
228         /*
229          * Find src coordinate from dest coordinate using the same
230          * math we will use below when iterating over tiles.
231          */
232         jint tilestart = TILESTART(dstloc, intorigin, tilesize);
233         jlong lsrcloc = (jlong) SRCLOC(tilestart, dblorigin, scale);
234         if (dstloc > tilestart) {
235             lsrcloc += lsrcinc * ((jlong) dstloc - tilestart);
236         }
237         if (lsrcloc >= lsrctarget) {
238             /*
239              * If we were previously less than target, then the current
240              * dstloc is the smallest dst which maps >= the target.
241              */
242             if (wasneg) break;
243             dstloc--;
244             waspos = JNI_TRUE;
245         } else {
246             /*
247              * If we were previously greater than target, then this must
248              * be the first dstloc which maps to < the target.  Since we
249              * want the smallest which maps >= the target, increment it
250              * first before returning.
251              */
252             dstloc++;
253             if (waspos) break;
254             wasneg = JNI_TRUE;
255         }
256     }
257     return dstloc;
258 }
259 
260 /*
261  * Class:     sun_java2d_loops_ScaledBlit
262  * Method:    Scale
263  * Signature: (Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;IIIIDDDD)V
264  */
265 JNIEXPORT void JNICALL
Java_sun_java2d_loops_ScaledBlit_Scale(JNIEnv * env,jobject self,jobject srcData,jobject dstData,jobject comp,jobject clip,jint sx1,jint sy1,jint sx2,jint sy2,jdouble ddx1,jdouble ddy1,jdouble ddx2,jdouble ddy2)266 Java_sun_java2d_loops_ScaledBlit_Scale
267     (JNIEnv *env, jobject self,
268      jobject srcData, jobject dstData,
269      jobject comp, jobject clip,
270      jint sx1, jint sy1, jint sx2, jint sy2,
271      jdouble ddx1, jdouble ddy1, jdouble ddx2, jdouble ddy2)
272 {
273     SurfaceDataOps *srcOps;
274     SurfaceDataOps *dstOps;
275     SurfaceDataRasInfo srcInfo;
276     SurfaceDataRasInfo dstInfo;
277     NativePrimitive *pPrim;
278     CompositeInfo compInfo;
279     jint sxinc, syinc, shift;
280     jint tilesize;
281     jint idx1, idy1;
282     jdouble scalex, scaley;
283     RegionData clipInfo;
284     jint dstFlags;
285     jboolean xunderflow, yunderflow;
286 
287     pPrim = GetNativePrim(env, self);
288     if (pPrim == NULL) {
289         return;
290     }
291     if (pPrim->pCompType->getCompInfo != NULL) {
292         (*pPrim->pCompType->getCompInfo)(env, &compInfo, comp);
293     }
294     if (Region_GetInfo(env, clip, &clipInfo)) {
295         return;
296     }
297 
298     srcOps = SurfaceData_GetOps(env, srcData);
299     if (srcOps == 0) {
300         return;
301     }
302     dstOps = SurfaceData_GetOps(env, dstData);
303     if (dstOps == 0) {
304         return;
305     }
306 
307     /*
308      * Determine the precision to use for the fixed point math
309      * for the coordinate scaling.
310      * - OR together srcw and srch to get the MSB between the two
311      * - Next shift it up until it goes negative
312      * - Count the shifts and that will be the most accurate
313      *   precision available for the fixed point math
314      * - a source coordinate of 1.0 will be (1 << shift)
315      * - srcw & srch will be (srcw << shift) and (srch << shift)
316      *   and will not overflow
317      * Note that if srcw or srch are so large that they are
318      * negative numbers before shifting, then:
319      * - shift will be 0
320      * - tilesize will end up being 1x1 tiles
321      * - we will brute force calculate the source location
322      *   of every destination pixel using the TILESTART and
323      *   SRCLOC macros in this function and then call the
324      *   scale helper function to copy one pixel at a time.
325      * - TILESTART involves mostly jdouble calculations so
326      *   it should not have integer overflow problems.
327      */
328     sxinc = (sx2 - sx1) | (sy2 - sy1);
329     shift = 0;
330     if (sxinc > 0) {
331         while ((sxinc <<= 1) > 0) {
332             shift++;
333         }
334     }
335     /*
336      * Now determine the scaled integer increments used to traverse
337      * the source image for each destination pixel.  Our shift value
338      * has been calculated above so that any location within the
339      * destination image can be represented as a scaled integer
340      * without incurring integer overflow.
341      *
342      * But we also need to worry about overflow of the sxinc and syinc
343      * parameters.  We already know that "srcw<<shift" and "srch<<shift"
344      * cannot overflow a jint, and the only time that sxinc and syinc
345      * can be larger than those two values is if ddy2-ddy1 or ddx2-ddx1
346      * are smaller than 1.  Since this situation implies that the
347      * output area is no more than one pixel wide or tall, then we are
348      * stepping by distances that are at least the size of the image
349      * and only one destination pixel will ever be rendered - thus the
350      * amount by which we step is largely irrelevant since after
351      * drawing the first "in bounds" pixel, we will step completely
352      * out of the source image and render nothing more.  As a result,
353      * we assign the appropriate "size of image" stepping parameter
354      * for any scale to smaller than one device pixel.
355      */
356     yunderflow = (ddy2 - ddy1) < 1.0;
357     scaley = (((jdouble) (sy2 - sy1)) / (ddy2 - ddy1)) * (1 << shift);
358     syinc = (yunderflow ? ((sy2 - sy1) << shift) : (jint) scaley);
359     xunderflow = (ddx2 - ddx1) < 1.0;
360     scalex = (((jdouble) (sx2 - sx1)) / (ddx2 - ddx1)) * (1 << shift);
361     sxinc = (xunderflow ? ((sx2 - sx1) << shift) : (jint) scalex);
362     tilesize = findpow2tilesize(shift, sxinc, syinc);
363 
364 
365     srcInfo.bounds.x1 = sx1;
366     srcInfo.bounds.y1 = sy1;
367     srcInfo.bounds.x2 = sx2;
368     srcInfo.bounds.y2 = sy2;
369     if (srcOps->Lock(env, srcOps, &srcInfo, pPrim->srcflags) != SD_SUCCESS) {
370         return;
371     }
372     if (srcInfo.bounds.x2 <= srcInfo.bounds.x1 ||
373         srcInfo.bounds.y2 <= srcInfo.bounds.y1)
374     {
375         SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
376         return;
377     }
378 
379     /*
380      * Only refine lower bounds if lower source coordinate was clipped
381      * because the math will work out to be exactly idx1, idy1 if not.
382      * Always refine upper bounds since we want to make sure not to
383      * overstep the source bounds based on the tiled iteration math.
384      *
385      * For underflow cases, simply check if the SRCLOC for the single
386      * destination pixel maps inside the source bounds.  If it does,
387      * we render that pixel row or column (and only that pixel row
388      * or column).  If it does not, we render nothing.
389      */
390     idx1 = (jint) ceil(ddx1 - 0.5);
391     idy1 = (jint) ceil(ddy1 - 0.5);
392     if (xunderflow) {
393         jdouble x = sx1 + (SRCLOC(idx1, ddx1, scalex) / (1 << shift));
394         dstInfo.bounds.x1 = dstInfo.bounds.x2 = idx1;
395         if (x >= srcInfo.bounds.x1 && x < srcInfo.bounds.x2) {
396             dstInfo.bounds.x2++;
397         }
398     } else {
399         dstInfo.bounds.x1 = ((srcInfo.bounds.x1 <= sx1)
400                              ? idx1
401                              : refine(idx1, ddx1, tilesize, scalex,
402                                       (srcInfo.bounds.x1-sx1) << shift, sxinc));
403         dstInfo.bounds.x2 = refine(idx1, ddx1, tilesize, scalex,
404                                    (srcInfo.bounds.x2-sx1) << shift, sxinc);
405     }
406     if (yunderflow) {
407         jdouble y = sy1 + (SRCLOC(idy1, ddy1, scaley) / (1 << shift));
408         dstInfo.bounds.y1 = dstInfo.bounds.y2 = idy1;
409         if (y >= srcInfo.bounds.y1 && y < srcInfo.bounds.y2) {
410             dstInfo.bounds.y2++;
411         }
412     } else {
413         dstInfo.bounds.y1 = ((srcInfo.bounds.y1 <= sy1)
414                              ? idy1
415                              : refine(idy1, ddy1, tilesize, scaley,
416                                       (srcInfo.bounds.y1-sy1) << shift, syinc));
417         dstInfo.bounds.y2 = refine(idy1, ddy1, tilesize, scaley,
418                                    (srcInfo.bounds.y2-sy1) << shift, syinc);
419     }
420 
421     SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
422     dstFlags = pPrim->dstflags;
423     if (!Region_IsRectangular(&clipInfo)) {
424         dstFlags |= SD_LOCK_PARTIAL_WRITE;
425     }
426     if (dstOps->Lock(env, dstOps, &dstInfo, dstFlags) != SD_SUCCESS) {
427         SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
428         return;
429     }
430 
431     if (dstInfo.bounds.x2 > dstInfo.bounds.x1 &&
432         dstInfo.bounds.y2 > dstInfo.bounds.y1)
433     {
434         srcOps->GetRasInfo(env, srcOps, &srcInfo);
435         dstOps->GetRasInfo(env, dstOps, &dstInfo);
436         if (srcInfo.rasBase && dstInfo.rasBase) {
437             SurfaceDataBounds span;
438             void *pSrc = PtrCoord(srcInfo.rasBase,
439                                   sx1, srcInfo.pixelStride,
440                                   sy1, srcInfo.scanStride);
441 
442             Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
443             Region_StartIteration(env, &clipInfo);
444             if (tilesize >= (ddx2 - ddx1) &&
445                 tilesize >= (ddy2 - ddy1))
446             {
447                 /* Do everything in one tile */
448                 jint sxloc = (jint) SRCLOC(idx1, ddx1, scalex);
449                 jint syloc = (jint) SRCLOC(idy1, ddy1, scaley);
450                 while (Region_NextIteration(&clipInfo, &span)) {
451                     jint tsxloc = sxloc;
452                     jint tsyloc = syloc;
453                     void *pDst;
454 
455                     if (span.y1 > idy1) {
456                         tsyloc += syinc * (span.y1 - idy1);
457                     }
458                     if (span.x1 > idx1) {
459                         tsxloc += sxinc * (span.x1 - idx1);
460                     }
461 
462                     pDst = PtrCoord(dstInfo.rasBase,
463                                     span.x1, dstInfo.pixelStride,
464                                     span.y1, dstInfo.scanStride);
465                     (*pPrim->funcs.scaledblit)(pSrc, pDst,
466                                                span.x2-span.x1, span.y2-span.y1,
467                                                tsxloc, tsyloc,
468                                                sxinc, syinc, shift,
469                                                &srcInfo, &dstInfo,
470                                                pPrim, &compInfo);
471                 }
472             } else {
473                 /* Break each clip span into tiles for better accuracy. */
474                 while (Region_NextIteration(&clipInfo, &span)) {
475                     jint tilex, tiley;
476                     jint sxloc, syloc;
477                     jint x1, y1, x2, y2;
478                     void *pDst;
479 
480                     for (tiley = TILESTART(span.y1, idy1, tilesize);
481                          tiley < span.y2;
482                          tiley += tilesize)
483                     {
484                         /* Clip span to Y range of current tile */
485                         y1 = tiley;
486                         y2 = tiley + tilesize;
487                         if (y1 < span.y1) y1 = span.y1;
488                         if (y2 > span.y2) y2 = span.y2;
489 
490                         /* Find scaled source coordinate of first pixel */
491                         syloc = (jint) SRCLOC(tiley, ddy1, scaley);
492                         if (y1 > tiley) {
493                             syloc += syinc * (y1 - tiley);
494                         }
495 
496                         for (tilex = TILESTART(span.x1, idx1, tilesize);
497                              tilex < span.x2;
498                              tilex += tilesize)
499                         {
500                             /* Clip span to X range of current tile */
501                             x1 = tilex;
502                             x2 = tilex + tilesize;
503                             if (x1 < span.x1) x1 = span.x1;
504                             if (x2 > span.x2) x2 = span.x2;
505 
506                             /* Find scaled source coordinate of first pixel */
507                             sxloc = (jint) SRCLOC(tilex, ddx1, scalex);
508                             if (x1 > tilex) {
509                                 sxloc += sxinc * (x1 - tilex);
510                             }
511 
512                             pDst = PtrCoord(dstInfo.rasBase,
513                                             x1, dstInfo.pixelStride,
514                                             y1, dstInfo.scanStride);
515                             (*pPrim->funcs.scaledblit)(pSrc, pDst, x2-x1, y2-y1,
516                                                        sxloc, syloc,
517                                                        sxinc, syinc, shift,
518                                                        &srcInfo, &dstInfo,
519                                                        pPrim, &compInfo);
520                         }
521                     }
522                 }
523             }
524             Region_EndIteration(env, &clipInfo);
525         }
526         SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
527         SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
528     }
529     SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
530     SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
531 }
532