1 /*
2  * Copyright (c) 2007, 2008, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 /**
27  * This file contains a standalone program that is used to generate the
28  * D3DShaders.h file.  The program invokes the fxc (D3D Shader Compiler)
29  * utility, which is part of the DirectX 9/10 SDK.  Since most JDK
30  * developers (other than some Java 2D engineers) do not have the full DXSDK
31  * installed, and since we do not want to make the JDK build process
32  * dependent on the full DXSDK installation, we have chosen not to make
33  * this shader compilation step part of the build process.  Instead, it is
34  * only necessary to compile and run this program when changes need to be
35  * made to the shader code contained within.  Typically, this only happens
36  * on an as-needed basis by someone familiar with the D3D pipeline.  Running
37  * this program is fairly straightforward:
38  *
39  *   % rm D3DShaders.h
40  *   % cl D3DShaderGen.c
41  *   % D3DShaderGen.exe
42  *
43  * (And don't forget to putback the updated D3DShaders.h file!)
44  */
45 
46 #include <stdio.h>
47 #include <process.h>
48 #include <Windows.h>
49 
50 static FILE *fpHeader = NULL;
51 static char *strHeaderFile = "D3DShaders.h";
52 
53 /** Evaluates to true if the given bit is set on the local flags variable. */
54 #define IS_SET(flagbit) \
55     (((flags) & (flagbit)) != 0)
56 
57 // REMIND
58 //#define J2dTraceLn(a, b) fprintf(stderr, "%s\n", b);
59 //#define J2dTraceLn1(a, b, c) fprintf(stderr, b, c);
60 #define J2dTraceLn(a, b)
61 #define J2dTraceLn1(a, b, c)
62 
63 /************************* General shader support ***************************/
64 
65 static void
D3DShaderGen_WriteShader(char * source,char * target,char * name,int flags)66 D3DShaderGen_WriteShader(char *source, char *target, char *name, int flags)
67 {
68     FILE *fpTmp;
69     char varname[50];
70     char *args[8];
71     int val;
72 
73     // write source to tmp.hlsl
74     fpTmp = fopen("tmp.hlsl", "w");
75     fprintf(fpTmp, "%s\n", source);
76     fclose(fpTmp);
77 
78     {
79         PROCESS_INFORMATION pi;
80         STARTUPINFO si;
81         char pargs[300];
82         sprintf(pargs,
83                 "c:\\progra~1\\mi5889~1\\utilit~1\\bin\\x86\\fxc.exe "
84                 "/T %s /Vn %s%d /Fh tmp.h tmp.hlsl",
85                 // uncomment the following line to generate debug
86                 // info in the shader header file (may be useful
87                 // for testing/debuggging purposes, but it nearly
88                 // doubles the size of the header file and compiled
89                 // shader programs - off for production builds)
90                 //"/Zi /T %s /Vn %s%d /Fh tmp.h tmp.hlsl",
91                 target, name, flags);
92         fprintf(stderr, "%s\n", pargs);
93         memset(&si, 0, sizeof(si));
94         si.cb = sizeof(si);
95         si.dwFlags = STARTF_USESTDHANDLES;
96         //si.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE);
97         //fprintf(stderr, "%s\n", pargs);
98         val = CreateProcess(0, pargs, 0, 0, TRUE,
99                             CREATE_NO_WINDOW, NULL, NULL, &si, &pi);
100 
101         {
102             DWORD code;
103             do {
104                 GetExitCodeProcess(pi.hProcess, &code);
105                 //fprintf(stderr, "waiting...");
106                 Sleep(100);
107             } while (code == STILL_ACTIVE);
108 
109             if (code != 0) {
110                 fprintf(stderr, "fxc failed for %s%d\n", name, flags);
111             }
112         }
113 
114         CloseHandle(pi.hThread);
115         CloseHandle(pi.hProcess);
116     }
117 
118     // append tmp.h to D3DShaders.h
119     {
120         int ch;
121         fpTmp = fopen("tmp.h", "r");
122         while ((ch = fgetc(fpTmp)) != EOF) {
123             fputc(ch, fpHeader);
124         }
125         fclose(fpTmp);
126     }
127 }
128 
129 static void
D3DShaderGen_WritePixelShader(char * source,char * name,int flags)130 D3DShaderGen_WritePixelShader(char *source, char *name, int flags)
131 {
132     D3DShaderGen_WriteShader(source, "ps_2_0", name, flags);
133 }
134 
135 #define MULTI_GRAD_CYCLE_METHOD (3 << 0)
136 /** Extracts the CycleMethod enum value from the given flags variable. */
137 #define EXTRACT_CYCLE_METHOD(flags) \
138     ((flags) & MULTI_GRAD_CYCLE_METHOD)
139 
140 static void
D3DShaderGen_WriteShaderArray(char * name,int num)141 D3DShaderGen_WriteShaderArray(char *name, int num)
142 {
143     char array[5000];
144     char elem[30];
145     int i;
146 
147     sprintf(array, "const DWORD *%sShaders[] =\n{\n", name);
148     for (i = 0; i < num; i++) {
149         if (num == 32 && EXTRACT_CYCLE_METHOD(i) == 3) {
150             // REMIND: what a hack!
151             sprintf(elem, "    NULL,\n");
152         } else {
153             sprintf(elem, "    %s%d,\n", name, i);
154         }
155         strcat(array, elem);
156     }
157     strcat(array, "};\n");
158 
159     // append to D3DShaders.h
160     fprintf(fpHeader, "%s\n", array);
161 }
162 
163 /**************************** ConvolveOp support ****************************/
164 
165 static const char *convolveShaderSource =
166     // image to be convolved
167     "sampler2D baseImage   : register(s0);"
168     // image edge limits:
169     //   imgEdge.xy = imgMin.xy (anything < will be treated as edge case)
170     //   imgEdge.zw = imgMax.xy (anything > will be treated as edge case)
171     "float4 imgEdge        : register(c0);"
172     // value for each location in the convolution kernel:
173     //   kernelVals[i].x = offsetX[i]
174     //   kernelVals[i].y = offsetY[i]
175     //   kernelVals[i].z = kernel[i]
176     "float3 kernelVals[%d] : register(c1);"
177     ""
178     "void main(in float2 tc : TEXCOORD0,"
179     "          inout float4 color : COLOR0)"
180     "{"
181     "    float4 sum = imgEdge - tc.xyxy;"
182     ""
183     "    if (sum.x > 0 || sum.y > 0 || sum.z < 0 || sum.w < 0) {"
184              // (placeholder for edge condition code)
185     "        color = %s;"
186     "    } else {"
187     "        int i;"
188     "        sum = float4(0, 0, 0, 0);"
189     "        for (i = 0; i < %d; i++) {"
190     "            sum +="
191     "                kernelVals[i].z *"
192     "                tex2D(baseImage, tc + kernelVals[i].xy);"
193     "        }"
194              // modulate with current color in order to apply extra alpha
195     "        color *= sum;"
196     "    }"
197     ""
198     "}";
199 
200 /**
201  * Flags that can be bitwise-or'ed together to control how the shader
202  * source code is generated.
203  */
204 #define CONVOLVE_EDGE_ZERO_FILL (1 << 0)
205 #define CONVOLVE_5X5            (1 << 1)
206 #define MAX_CONVOLVE            (1 << 2)
207 
208 static void
D3DShaderGen_GenerateConvolveShader(int flags)209 D3DShaderGen_GenerateConvolveShader(int flags)
210 {
211     int kernelMax = IS_SET(CONVOLVE_5X5) ? 25 : 9;
212     char *edge;
213     char finalSource[2000];
214 
215     J2dTraceLn1(J2D_TRACE_INFO,
216                 "D3DShaderGen_GenerateConvolveShader: flags=%d",
217                 flags);
218 
219     if (IS_SET(CONVOLVE_EDGE_ZERO_FILL)) {
220         // EDGE_ZERO_FILL: fill in zero at the edges
221         edge = "float4(0, 0, 0, 0)";
222     } else {
223         // EDGE_NO_OP: use the source pixel color at the edges
224         edge = "tex2D(baseImage, tc)";
225     }
226 
227     // compose the final source code string from the various pieces
228     sprintf(finalSource, convolveShaderSource,
229             kernelMax, edge, kernelMax);
230 
231     D3DShaderGen_WritePixelShader(finalSource, "convolve", flags);
232 }
233 
234 /**************************** RescaleOp support *****************************/
235 
236 static const char *rescaleShaderSource =
237     // image to be rescaled
238     "sampler2D baseImage : register(s0);"
239     // vector containing scale factors
240     "float4 scaleFactors : register(c0);"
241     // vector containing offsets
242     "float4 offsets      : register(c1);"
243     ""
244     "void main(in float2 tc : TEXCOORD0,"
245     "          inout float4 color : COLOR0)"
246     "{"
247     "    float4 srcColor = tex2D(baseImage, tc);"
248     ""
249          // (placeholder for un-premult code)
250     "    %s"
251     ""
252          // rescale source value
253     "    float4 result = (srcColor * scaleFactors) + offsets;"
254     ""
255          // (placeholder for re-premult code)
256     "    %s"
257     ""
258          // modulate with current color in order to apply extra alpha
259     "    color *= result;"
260     "}";
261 
262 /**
263  * Flags that can be bitwise-or'ed together to control how the shader
264  * source code is generated.
265  */
266 #define RESCALE_NON_PREMULT (1 << 0)
267 #define MAX_RESCALE         (1 << 1)
268 
269 static void
D3DShaderGen_GenerateRescaleShader(int flags)270 D3DShaderGen_GenerateRescaleShader(int flags)
271 {
272     char *preRescale = "";
273     char *postRescale = "";
274     char finalSource[2000];
275 
276     J2dTraceLn1(J2D_TRACE_INFO,
277                 "D3DShaderGen_GenerateRescaleShader: flags=%d",
278                 flags);
279 
280     if (IS_SET(RESCALE_NON_PREMULT)) {
281         preRescale  = "srcColor.rgb /= srcColor.a;";
282         postRescale = "result.rgb *= result.a;";
283     }
284 
285     // compose the final source code string from the various pieces
286     sprintf(finalSource, rescaleShaderSource,
287             preRescale, postRescale);
288 
289     D3DShaderGen_WritePixelShader(finalSource, "rescale", flags);
290 }
291 
292 /**************************** LookupOp support ******************************/
293 
294 static const char *lookupShaderSource =
295     // source image (bound to texture unit 0)
296     "sampler2D baseImage   : register(s0);"
297     // lookup table (bound to texture unit 1)
298     "sampler2D lookupTable : register(s1);"
299     // offset subtracted from source index prior to lookup step
300     "float4 offset         : register(c0);"
301     ""
302     "void main(in float2 tc : TEXCOORD0,"
303     "          inout float4 color : COLOR0)"
304     "{"
305     "    float4 srcColor = tex2D(baseImage, tc);"
306          // (placeholder for un-premult code)
307     "    %s"
308          // subtract offset from original index
309     "    float4 srcIndex = srcColor - offset;"
310          // use source value as input to lookup table (note that
311          // "v" texcoords are hardcoded to hit texel centers of
312          // each row/band in texture)
313     "    float4 result;"
314     "    result.r = tex2D(lookupTable, float2(srcIndex.r, 0.125)).r;"
315     "    result.g = tex2D(lookupTable, float2(srcIndex.g, 0.375)).r;"
316     "    result.b = tex2D(lookupTable, float2(srcIndex.b, 0.625)).r;"
317          // (placeholder for alpha store code)
318     "    %s"
319          // (placeholder for re-premult code)
320     "    %s"
321          // modulate with current color in order to apply extra alpha
322     "    color *= result;"
323     "}";
324 
325 /**
326  * Flags that can be bitwise-or'ed together to control how the shader
327  * source code is generated.
328  */
329 #define LOOKUP_USE_SRC_ALPHA (1 << 0)
330 #define LOOKUP_NON_PREMULT   (1 << 1)
331 #define MAX_LOOKUP           (1 << 2)
332 
333 static void
D3DShaderGen_GenerateLookupShader(int flags)334 D3DShaderGen_GenerateLookupShader(int flags)
335 {
336     char *alpha;
337     char *preLookup = "";
338     char *postLookup = "";
339     char finalSource[2000];
340 
341     J2dTraceLn1(J2D_TRACE_INFO,
342                 "D3DShaderGen_GenerateLookupShader: flags=%d",
343                 flags);
344 
345     if (IS_SET(LOOKUP_USE_SRC_ALPHA)) {
346         // when numComps is 1 or 3, the alpha is not looked up in the table;
347         // just keep the alpha from the source fragment
348         alpha = "result.a = srcColor.a;";
349     } else {
350         // when numComps is 4, the alpha is looked up in the table, just
351         // like the other color components from the source fragment
352         alpha = "result.a = tex2D(lookupTable, float2(srcIndex.a, 0.875)).r;";
353     }
354     if (IS_SET(LOOKUP_NON_PREMULT)) {
355         preLookup  = "srcColor.rgb /= srcColor.a;";
356         postLookup = "result.rgb *= result.a;";
357     }
358 
359     // compose the final source code string from the various pieces
360     sprintf(finalSource, lookupShaderSource,
361             preLookup, alpha, postLookup);
362 
363     D3DShaderGen_WritePixelShader(finalSource, "lookup", flags);
364 }
365 
366 /************************* GradientPaint support ****************************/
367 
368 /*
369  * To simplify the code and to make it easier to upload a number of
370  * uniform values at once, we pack a bunch of scalar (float) values
371  * into a single float3 below.  Here's how the values are related:
372  *
373  *   params.x = p0
374  *   params.y = p1
375  *   params.z = p3
376  */
377 static const char *basicGradientShaderSource =
378     "float3 params : register (c0);"
379     "float4 color1 : register (c1);"
380     "float4 color2 : register (c2);"
381     // (placeholder for mask variable)
382     "%s"
383     ""
384     // (placeholder for mask texcoord input)
385     "void main(%s"
386     "          in float4 winCoord : TEXCOORD%d,"
387     "          inout float4 color : COLOR0)"
388     "{"
389     "    float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);"
390     "    float dist = dot(params.xyz, fragCoord);"
391     ""
392          // the setup code for p0/p1/p3 translates/scales to hit texel
393          // centers (at 0.25 and 0.75) because it is needed for the
394          // original/fast texture-based implementation, but it is not
395          // desirable for this shader-based implementation, so we
396          // re-transform the value here...
397     "    dist = (dist - 0.25) * 2.0;"
398     ""
399     "    float fraction;"
400          // (placeholder for cycle code)
401     "    %s"
402     ""
403     "    float4 result = lerp(color1, color2, fraction);"
404     ""
405          // (placeholder for mask modulation code)
406     "    %s"
407     ""
408          // modulate with current color in order to apply extra alpha
409     "    color *= result;"
410     "}";
411 
412 /**
413  * Flags that can be bitwise-or'ed together to control how the shader
414  * source code is generated.
415  */
416 #define BASIC_GRAD_IS_CYCLIC (1 << 0)
417 #define BASIC_GRAD_USE_MASK  (1 << 1)
418 #define MAX_BASIC_GRAD       (1 << 2)
419 
420 static void
D3DShaderGen_GenerateBasicGradShader(int flags)421 D3DShaderGen_GenerateBasicGradShader(int flags)
422 {
423     int colorSampler = IS_SET(BASIC_GRAD_USE_MASK) ? 1 : 0;
424     char *cycleCode;
425     char *maskVars = "";
426     char *maskInput = "";
427     char *maskCode = "";
428     char finalSource[3000];
429 
430     J2dTraceLn1(J2D_TRACE_INFO,
431                 "D3DShaderGen_GenerateBasicGradShader",
432                 flags);
433 
434     if (IS_SET(BASIC_GRAD_IS_CYCLIC)) {
435         cycleCode =
436             "fraction = 1.0 - (abs(frac(dist * 0.5) - 0.5) * 2.0);";
437     } else {
438         cycleCode =
439             "fraction = clamp(dist, 0.0, 1.0);";
440     }
441 
442     if (IS_SET(BASIC_GRAD_USE_MASK)) {
443         /*
444          * This code modulates the calculated result color with the
445          * corresponding alpha value from the alpha mask texture active
446          * on texture unit 0.  Only needed when useMask is true (i.e., only
447          * for MaskFill operations).
448          */
449         maskVars = "sampler2D mask : register(s0);";
450         maskInput = "in float4 maskCoord : TEXCOORD0,";
451         maskCode = "result *= tex2D(mask, maskCoord.xy).a;";
452     }
453 
454     // compose the final source code string from the various pieces
455     sprintf(finalSource, basicGradientShaderSource,
456             maskVars, maskInput, colorSampler, cycleCode, maskCode);
457 
458     D3DShaderGen_WritePixelShader(finalSource, "grad", flags);
459 }
460 
461 /****************** Shared MultipleGradientPaint support ********************/
462 
463 /**
464  * These constants are identical to those defined in the
465  * MultipleGradientPaint.CycleMethod enum; they are copied here for
466  * convenience (ideally we would pull them directly from the Java level,
467  * but that entails more hassle than it is worth).
468  */
469 #define CYCLE_NONE    0
470 #define CYCLE_REFLECT 1
471 #define CYCLE_REPEAT  2
472 
473 /**
474  * The following constants are flags that can be bitwise-or'ed together
475  * to control how the MultipleGradientPaint shader source code is generated:
476  *
477  *   MULTI_GRAD_CYCLE_METHOD
478  *     Placeholder for the CycleMethod enum constant.
479  *
480  *   MULTI_GRAD_LARGE
481  *     If set, use the (slower) shader that supports a larger number of
482  *     gradient colors; otherwise, use the optimized codepath.  See
483  *     the MAX_FRACTIONS_SMALL/LARGE constants below for more details.
484  *
485  *   MULTI_GRAD_USE_MASK
486  *     If set, apply the alpha mask value from texture unit 1 to the
487  *     final color result (only used in the MaskFill case).
488  *
489  *   MULTI_GRAD_LINEAR_RGB
490  *     If set, convert the linear RGB result back into the sRGB color space.
491  */
492 //#define MULTI_GRAD_CYCLE_METHOD (3 << 0)
493 #define MULTI_GRAD_LARGE        (1 << 2)
494 #define MULTI_GRAD_USE_MASK     (1 << 3)
495 #define MULTI_GRAD_LINEAR_RGB   (1 << 4)
496 
497 // REMIND
498 #define MAX_MULTI_GRAD     (1 << 5)
499 
500 /** Extracts the CycleMethod enum value from the given flags variable. */
501 //#define EXTRACT_CYCLE_METHOD(flags) \
502 //    ((flags) & MULTI_GRAD_CYCLE_METHOD)
503 
504 /**
505  * The maximum number of gradient "stops" supported by the fragment shader
506  * and related code.  When the MULTI_GRAD_LARGE flag is set, we will use
507  * MAX_FRACTIONS_LARGE; otherwise, we use MAX_FRACTIONS_SMALL.  By having
508  * two separate values, we can have one highly optimized shader (SMALL) that
509  * supports only a few fractions/colors, and then another, less optimal
510  * shader that supports more stops.
511  */
512 #define MAX_FRACTIONS 8
513 #define MAX_FRACTIONS_LARGE MAX_FRACTIONS
514 #define MAX_FRACTIONS_SMALL 4
515 
516 /**
517  * The maximum number of gradient colors supported by all of the gradient
518  * fragment shaders.  Note that this value must be a power of two, as it
519  * determines the size of the 1D texture created below.  It also must be
520  * greater than or equal to MAX_FRACTIONS (there is no strict requirement
521  * that the two values be equal).
522  */
523 #define MAX_COLORS 16
524 
525 static const char *multiGradientShaderSource =
526     // gradient texture size (in texels)
527     "#define TEXTURE_SIZE  %d\n"
528     // maximum number of fractions/colors supported by this shader
529     "#define MAX_FRACTIONS %d\n"
530     // size of a single texel
531     "#define FULL_TEXEL    (1.0 / float(TEXTURE_SIZE))\n"
532     // size of half of a single texel
533     "#define HALF_TEXEL    (FULL_TEXEL / 2.0)\n"
534     // texture containing the gradient colors
535     "sampler2D colors                : register (s%d);"
536     // array of gradient stops/fractions and corresponding scale factors
537     //   fractions[i].x = gradientStop[i]
538     //   fractions[i].y = scaleFactor[i]
539     "float2 fractions[MAX_FRACTIONS] : register (c0);"
540     // (placeholder for mask variable)
541     "%s"
542     // (placeholder for Linear/RadialGP-specific variables)
543     "%s"
544     ""
545     // (placeholder for mask texcoord input)
546     "void main(%s"
547     "          in float4 winCoord : TEXCOORD%d,"
548     "          inout float4 color : COLOR0)"
549     "{"
550     "    float dist;"
551          // (placeholder for Linear/RadialGradientPaint-specific code)
552     "    %s"
553     ""
554     "    float4 result;"
555          // (placeholder for CycleMethod-specific code)
556     "    %s"
557     ""
558          // (placeholder for ColorSpace conversion code)
559     "    %s"
560     ""
561          // (placeholder for mask modulation code)
562     "    %s"
563     ""
564          // modulate with current color in order to apply extra alpha
565     "    color *= result;"
566     "}";
567 
568 /*
569  * Note: An earlier version of this code would simply calculate a single
570  * texcoord:
571  *     "tc = HALF_TEXEL + (FULL_TEXEL * relFraction);"
572  * and then use that value to do a single texture lookup, taking advantage
573  * of the LINEAR texture filtering mode which in theory will do the
574  * appropriate linear interpolation between adjacent texels, like this:
575  *     "float4 result = tex2D(colors, float2(tc, 0.5));"
576  *
577  * The problem with that approach is that on certain hardware (from ATI,
578  * notably) the LINEAR texture fetch unit has low precision, and would
579  * for instance only produce 64 distinct grayscales between white and black,
580  * instead of the expected 256.  The visual banding caused by this issue
581  * is severe enough to likely cause complaints from developers, so we have
582  * devised a new approach below that instead manually fetches the two
583  * relevant neighboring texels and then performs the linear interpolation
584  * using the lerp() instruction (which does not suffer from the precision
585  * issues of the fixed-function texture filtering unit).  This new approach
586  * requires a few more instructions and is therefore slightly slower than
587  * the old approach (not more than 10% or so).
588  */
589 static const char *texCoordCalcCode =
590     "int i;"
591     "float relFraction = 0.0;"
592     "for (i = 0; i < MAX_FRACTIONS-1; i++) {"
593     "    relFraction +="
594     "        clamp((dist - fractions[i].x) * fractions[i].y, 0.0, 1.0);"
595     "}"
596     // we offset by half a texel so that we find the linearly interpolated
597     // color between the two texel centers of interest
598     "float intPart = floor(relFraction);"
599     "float tc1 = HALF_TEXEL + (FULL_TEXEL * intPart);"
600     "float tc2 = HALF_TEXEL + (FULL_TEXEL * (intPart + 1.0));"
601     "float4 clr1 = tex2D(colors, float2(tc1, 0.5));"
602     "float4 clr2 = tex2D(colors, float2(tc2, 0.5));"
603     "result = lerp(clr1, clr2, frac(relFraction));";
604 
605 /** Code for NO_CYCLE that gets plugged into the CycleMethod placeholder. */
606 static const char *noCycleCode =
607     "if (dist <= 0.0) {"
608     "    result = tex2D(colors, float2(0.0, 0.5));"
609     "} else if (dist >= 1.0) {"
610     "    result = tex2D(colors, float2(1.0, 0.5));"
611     "} else {"
612          // (placeholder for texcoord calculation)
613     "    %s"
614     "}";
615 
616 /** Code for REFLECT that gets plugged into the CycleMethod placeholder. */
617 static const char *reflectCode =
618     "dist = 1.0 - (abs(frac(dist * 0.5) - 0.5) * 2.0);"
619     // (placeholder for texcoord calculation)
620     "%s";
621 
622 /** Code for REPEAT that gets plugged into the CycleMethod placeholder. */
623 static const char *repeatCode =
624     "dist = frac(dist);"
625     // (placeholder for texcoord calculation)
626     "%s";
627 
628 static void
D3DShaderGen_GenerateMultiGradShader(int flags,char * name,char * paintVars,char * distCode)629 D3DShaderGen_GenerateMultiGradShader(int flags, char *name,
630                                      char *paintVars, char *distCode)
631 {
632     char *maskVars = "";
633     char *maskInput = "";
634     char *maskCode = "";
635     char *colorSpaceCode = "";
636     char cycleCode[1500];
637     char finalSource[3000];
638     int colorSampler = IS_SET(MULTI_GRAD_USE_MASK) ? 1 : 0;
639     int cycleMethod = EXTRACT_CYCLE_METHOD(flags);
640     int maxFractions = IS_SET(MULTI_GRAD_LARGE) ?
641         MAX_FRACTIONS_LARGE : MAX_FRACTIONS_SMALL;
642 
643     J2dTraceLn(J2D_TRACE_INFO, "OGLPaints_CreateMultiGradProgram");
644 
645     if (IS_SET(MULTI_GRAD_USE_MASK)) {
646         /*
647          * This code modulates the calculated result color with the
648          * corresponding alpha value from the alpha mask texture active
649          * on texture unit 0.  Only needed when useMask is true (i.e., only
650          * for MaskFill operations).
651          */
652         maskVars = "sampler2D mask : register(s0);";
653         maskInput = "in float4 maskCoord : TEXCOORD0,";
654         maskCode = "result *= tex2D(mask, maskCoord.xy).a;";
655     }
656 
657     if (IS_SET(MULTI_GRAD_LINEAR_RGB)) {
658         /*
659          * This code converts a single pixel in linear RGB space back
660          * into sRGB (note: this code was adapted from the
661          * MultipleGradientPaintContext.convertLinearRGBtoSRGB() method).
662          */
663         colorSpaceCode =
664             "result.rgb = 1.055 * pow(result.rgb, 0.416667) - 0.055;";
665     }
666 
667     if (cycleMethod == CYCLE_NONE) {
668         sprintf(cycleCode, noCycleCode, texCoordCalcCode);
669     } else if (cycleMethod == CYCLE_REFLECT) {
670         sprintf(cycleCode, reflectCode, texCoordCalcCode);
671     } else { // (cycleMethod == CYCLE_REPEAT)
672         sprintf(cycleCode, repeatCode, texCoordCalcCode);
673     }
674 
675     // compose the final source code string from the various pieces
676     sprintf(finalSource, multiGradientShaderSource,
677             MAX_COLORS, maxFractions, colorSampler,
678             maskVars, paintVars, maskInput, colorSampler,
679             distCode, cycleCode, colorSpaceCode, maskCode);
680 
681     D3DShaderGen_WritePixelShader(finalSource, name, flags);
682 }
683 
684 /********************** LinearGradientPaint support *************************/
685 
686 static void
D3DShaderGen_GenerateLinearGradShader(int flags)687 D3DShaderGen_GenerateLinearGradShader(int flags)
688 {
689     char *paintVars;
690     char *distCode;
691 
692     J2dTraceLn1(J2D_TRACE_INFO,
693                 "D3DShaderGen_GenerateLinearGradShader",
694                 flags);
695 
696     /*
697      * To simplify the code and to make it easier to upload a number of
698      * uniform values at once, we pack a bunch of scalar (float) values
699      * into a single float3 below.  Here's how the values are related:
700      *
701      *   params.x = p0
702      *   params.y = p1
703      *   params.z = p3
704      */
705     paintVars =
706         "float3 params : register(c16);";
707     distCode =
708         "float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);"
709         "dist = dot(params.xyz, fragCoord);";
710 
711     D3DShaderGen_GenerateMultiGradShader(flags, "linear",
712                                          paintVars, distCode);
713 }
714 
715 /********************** RadialGradientPaint support *************************/
716 
717 static void
D3DShaderGen_GenerateRadialGradShader(int flags)718 D3DShaderGen_GenerateRadialGradShader(int flags)
719 {
720     char *paintVars;
721     char *distCode;
722 
723     J2dTraceLn1(J2D_TRACE_INFO,
724                 "D3DShaderGen_GenerateRadialGradShader",
725                 flags);
726 
727     /*
728      * To simplify the code and to make it easier to upload a number of
729      * uniform values at once, we pack a bunch of scalar (float) values
730      * into float3 values below.  Here's how the values are related:
731      *
732      *   m0.x = m00
733      *   m0.y = m01
734      *   m0.z = m02
735      *
736      *   m1.x = m10
737      *   m1.y = m11
738      *   m1.z = m12
739      *
740      *   precalc.x = focusX
741      *   precalc.y = 1.0 - (focusX * focusX)
742      *   precalc.z = 1.0 / precalc.z
743      */
744     paintVars =
745         "float3 m0      : register(c16);"
746         "float3 m1      : register(c17);"
747         "float3 precalc : register(c18);";
748 
749     /*
750      * The following code is derived from Daniel Rice's whitepaper on
751      * radial gradient performance (attached to the bug report for 6521533).
752      * Refer to that document as well as the setup code in the Java-level
753      * BufferedPaints.setRadialGradientPaint() method for more details.
754      */
755     distCode =
756         "float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);"
757         "float x = dot(fragCoord, m0);"
758         "float y = dot(fragCoord, m1);"
759         "float xfx = x - precalc.x;"
760         "dist = (precalc.x*xfx + sqrt(xfx*xfx + y*y*precalc.y))*precalc.z;";
761 
762     D3DShaderGen_GenerateMultiGradShader(flags, "radial",
763                                          paintVars, distCode);
764 }
765 
766 /*************************** LCD text support *******************************/
767 
768 // REMIND: Shader uses texture addressing operations in a dependency chain
769 //         that is too complex for the target shader model (ps_2_0) to handle
770 //         (ugh, I guess we can either require ps_3_0 or just use
771 //         the slower pow intrinsic)
772 #define POW_LUT 0
773 
774 static const char *lcdTextShaderSource =
775     "float3 srcAdj         : register(c0);"
776     "sampler2D glyphTex    : register(s0);"
777     "sampler2D dstTex      : register(s1);"
778 #if POW_LUT
779     "sampler3D invgammaTex : register(s2);"
780     "sampler3D gammaTex    : register(s3);"
781 #else
782     "float3 invgamma       : register(c1);"
783     "float3 gamma          : register(c2);"
784 #endif
785     ""
786     "void main(in float2 tc0 : TEXCOORD0,"
787     "          in float2 tc1 : TEXCOORD1,"
788     "          inout float4 color : COLOR0)"
789     "{"
790          // load the RGB value from the glyph image at the current texcoord
791     "    float3 glyphClr = tex2D(glyphTex, tc0).rgb;"
792     "    if (!any(glyphClr)) {"
793              // zero coverage, so skip this fragment
794     "        discard;"
795     "    }"
796          // load the RGB value from the corresponding destination pixel
797     "    float3 dstClr = tex2D(dstTex, tc1).rgb;"
798          // gamma adjust the dest color using the invgamma LUT
799 #if POW_LUT
800     "    float3 dstAdj = tex3D(invgammaTex, dstClr).rgb;"
801 #else
802     "    float3 dstAdj = pow(dstClr, invgamma);"
803 #endif
804          // linearly interpolate the three color values
805     "    float3 result = lerp(dstAdj, srcAdj, glyphClr);"
806          // gamma re-adjust the resulting color (alpha is always set to 1.0)
807 #if POW_LUT
808     "    color = float4(tex3D(gammaTex, result).rgb, 1.0);"
809 #else
810     "    color = float4(pow(result, gamma), 1.0);"
811 #endif
812     "}";
813 
814 static void
D3DShaderGen_GenerateLCDTextShader()815 D3DShaderGen_GenerateLCDTextShader()
816 {
817     J2dTraceLn(J2D_TRACE_INFO, "D3DShaderGen_GenerateLCDTextShader");
818 
819     D3DShaderGen_WritePixelShader((char *)lcdTextShaderSource, "lcdtext", 0);
820 }
821 
822 /*************************** AA support *******************************/
823 
824 /*
825  * This shader fills the space between an outer and inner parallelogram.
826  * It can be used to draw an outline by specifying both inner and outer
827  * values.  It fills pixels by estimating what portion falls inside the
828  * outer shape, and subtracting an estimate of what portion falls inside
829  * the inner shape.  Specifying both inner and outer values produces a
830  * standard "wide outline".  Specifying an inner shape that falls far
831  * outside the outer shape allows the same shader to fill the outer
832  * shape entirely since pixels that fall within the outer shape are never
833  * inside the inner shape and so they are filled based solely on their
834  * coverage of the outer shape.
835  *
836  * The setup code renders this shader over the bounds of the outer
837  * shape (or the only shape in the case of a fill operation) and
838  * sets the texture 0 coordinates so that 0,0=>0,1=>1,1=>1,0 in those
839  * texture coordinates map to the four corners of the parallelogram.
840  * Similarly the texture 1 coordinates map the inner shape to the
841  * unit square as well, but in a different coordinate system.
842  *
843  * When viewed in the texture coordinate systems the parallelograms
844  * we are filling are unit squares, but the pixels have then become
845  * tiny parallelograms themselves.  Both of the texture coordinate
846  * systems are affine transforms so the rate of change in X and Y
847  * of the texture coordinates are essentially constants and happen
848  * to correspond to the size and direction of the slanted sides of
849  * the distorted pixels relative to the "square mapped" boundary
850  * of the parallelograms.
851  *
852  * The shader uses the ddx() and ddy() functions to measure the "rate
853  * of change" of these texture coordinates and thus gets an accurate
854  * measure of the size and shape of a pixel relative to the two
855  * parallelograms.  It then uses the bounds of the size and shape
856  * of a pixel to intersect with the unit square to estimate the
857  * coverage of the pixel.  Unfortunately, without a lot more work
858  * to calculate the exact area of intersection between a unit
859  * square (the original parallelogram) and a parallelogram (the
860  * distorted pixel), this shader only approximates the pixel
861  * coverage, but emperically the estimate is very useful and
862  * produces visually pleasing results, if not theoretically accurate.
863  */
864 static const char *aaShaderSource =
865     "void main(in float2 tco : TEXCOORD0,"
866     "          in float2 tci : TEXCOORD1,"
867     "          inout float4 color : COLOR0)"
868     "{"
869     // Calculate the vectors for the "legs" of the pixel parallelogram
870     // for the outer parallelogram.
871     "    float2 oleg1 = ddx(tco);"
872     "    float2 oleg2 = ddy(tco);"
873     // Calculate the bounds of the distorted pixel parallelogram.
874     "    float2 omin = min(tco, tco+oleg1);"
875     "    omin = min(omin, tco+oleg2);"
876     "    omin = min(omin, tco+oleg1+oleg2);"
877     "    float2 omax = max(tco, tco+oleg1);"
878     "    omax = max(omax, tco+oleg2);"
879     "    omax = max(omax, tco+oleg1+oleg2);"
880     // Calculate the vectors for the "legs" of the pixel parallelogram
881     // for the inner parallelogram.
882     "    float2 ileg1 = ddx(tci);"
883     "    float2 ileg2 = ddy(tci);"
884     // Calculate the bounds of the distorted pixel parallelogram.
885     "    float2 imin = min(tci, tci+ileg1);"
886     "    imin = min(imin, tci+ileg2);"
887     "    imin = min(imin, tci+ileg1+ileg2);"
888     "    float2 imax = max(tci, tci+ileg1);"
889     "    imax = max(imax, tci+ileg2);"
890     "    imax = max(imax, tci+ileg1+ileg2);"
891     // Clamp the bounds of the parallelograms to the unit square to
892     // estimate the intersection of the pixel parallelogram with
893     // the unit square.  The ratio of the 2 rectangle areas is a
894     // reasonable estimate of the proportion of coverage.
895     "    float2 o1 = clamp(omin, 0.0, 1.0);"
896     "    float2 o2 = clamp(omax, 0.0, 1.0);"
897     "    float oint = (o2.y-o1.y)*(o2.x-o1.x);"
898     "    float oarea = (omax.y-omin.y)*(omax.x-omin.x);"
899     "    float2 i1 = clamp(imin, 0.0, 1.0);"
900     "    float2 i2 = clamp(imax, 0.0, 1.0);"
901     "    float iint = (i2.y-i1.y)*(i2.x-i1.x);"
902     "    float iarea = (imax.y-imin.y)*(imax.x-imin.x);"
903     // Proportion of pixel in outer shape minus the proportion
904     // of pixel in the inner shape == the coverage of the pixel
905     // in the area between the two.
906     "    float coverage = oint/oarea - iint / iarea;"
907     "    color *= coverage;"
908     "}";
909 
910 static void
D3DShaderGen_GenerateAAParallelogramShader()911 D3DShaderGen_GenerateAAParallelogramShader()
912 {
913     J2dTraceLn(J2D_TRACE_INFO, "D3DShaderGen_GenerateAAParallelogramShader");
914 
915     D3DShaderGen_WriteShader((char *)aaShaderSource, "ps_2_a", "aapgram", 0);
916 }
917 
918 /**************************** Main entrypoint *******************************/
919 
920 static void
D3DShaderGen_GenerateAllShaders()921 D3DShaderGen_GenerateAllShaders()
922 {
923     int i;
924 
925 #if 1
926     // Generate BufferedImageOp shaders
927     for (i = 0; i < MAX_RESCALE; i++) {
928         D3DShaderGen_GenerateRescaleShader(i);
929     }
930     D3DShaderGen_WriteShaderArray("rescale", MAX_RESCALE);
931     for (i = 0; i < MAX_CONVOLVE; i++) {
932         D3DShaderGen_GenerateConvolveShader(i);
933     }
934     D3DShaderGen_WriteShaderArray("convolve", MAX_CONVOLVE);
935     for (i = 0; i < MAX_LOOKUP; i++) {
936         D3DShaderGen_GenerateLookupShader(i);
937     }
938     D3DShaderGen_WriteShaderArray("lookup", MAX_LOOKUP);
939 
940     // Generate Paint shaders
941     for (i = 0; i < MAX_BASIC_GRAD; i++) {
942         D3DShaderGen_GenerateBasicGradShader(i);
943     }
944     D3DShaderGen_WriteShaderArray("grad", MAX_BASIC_GRAD);
945     for (i = 0; i < MAX_MULTI_GRAD; i++) {
946         if (EXTRACT_CYCLE_METHOD(i) == 3) continue; // REMIND
947         D3DShaderGen_GenerateLinearGradShader(i);
948     }
949     D3DShaderGen_WriteShaderArray("linear", MAX_MULTI_GRAD);
950     for (i = 0; i < MAX_MULTI_GRAD; i++) {
951         if (EXTRACT_CYCLE_METHOD(i) == 3) continue; // REMIND
952         D3DShaderGen_GenerateRadialGradShader(i);
953     }
954     D3DShaderGen_WriteShaderArray("radial", MAX_MULTI_GRAD);
955 
956     // Generate LCD text shader
957     D3DShaderGen_GenerateLCDTextShader();
958 
959     // Genereate Shader to fill Antialiased parallelograms
960     D3DShaderGen_GenerateAAParallelogramShader();
961 #else
962     /*
963     for (i = 0; i < MAX_RESCALE; i++) {
964         D3DShaderGen_GenerateRescaleShader(i);
965     }
966     D3DShaderGen_WriteShaderArray("rescale", MAX_RESCALE);
967     */
968     //D3DShaderGen_GenerateConvolveShader(2);
969     //D3DShaderGen_GenerateLCDTextShader();
970     //D3DShaderGen_GenerateLinearGradShader(16);
971     D3DShaderGen_GenerateBasicGradShader(0);
972 #endif
973 }
974 
975 int
main(int argc,char ** argv)976 main(int argc, char **argv)
977 {
978     fpHeader = fopen(strHeaderFile, "a");
979 
980     D3DShaderGen_GenerateAllShaders();
981 
982     fclose(fpHeader);
983 
984     return 0;
985 }
986