1 /*
2 * Copyright (c) 2007, 2008, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 /**
27 * This file contains a standalone program that is used to generate the
28 * D3DShaders.h file. The program invokes the fxc (D3D Shader Compiler)
29 * utility, which is part of the DirectX 9/10 SDK. Since most JDK
30 * developers (other than some Java 2D engineers) do not have the full DXSDK
31 * installed, and since we do not want to make the JDK build process
32 * dependent on the full DXSDK installation, we have chosen not to make
33 * this shader compilation step part of the build process. Instead, it is
34 * only necessary to compile and run this program when changes need to be
35 * made to the shader code contained within. Typically, this only happens
36 * on an as-needed basis by someone familiar with the D3D pipeline. Running
37 * this program is fairly straightforward:
38 *
39 * % rm D3DShaders.h
40 * % cl D3DShaderGen.c
41 * % D3DShaderGen.exe
42 *
43 * (And don't forget to putback the updated D3DShaders.h file!)
44 */
45
46 #include <stdio.h>
47 #include <process.h>
48 #include <Windows.h>
49
50 static FILE *fpHeader = NULL;
51 static char *strHeaderFile = "D3DShaders.h";
52
53 /** Evaluates to true if the given bit is set on the local flags variable. */
54 #define IS_SET(flagbit) \
55 (((flags) & (flagbit)) != 0)
56
57 // REMIND
58 //#define J2dTraceLn(a, b) fprintf(stderr, "%s\n", b);
59 //#define J2dTraceLn1(a, b, c) fprintf(stderr, b, c);
60 #define J2dTraceLn(a, b)
61 #define J2dTraceLn1(a, b, c)
62
63 /************************* General shader support ***************************/
64
65 static void
D3DShaderGen_WriteShader(char * source,char * target,char * name,int flags)66 D3DShaderGen_WriteShader(char *source, char *target, char *name, int flags)
67 {
68 FILE *fpTmp;
69 char varname[50];
70 char *args[8];
71 int val;
72
73 // write source to tmp.hlsl
74 fpTmp = fopen("tmp.hlsl", "w");
75 fprintf(fpTmp, "%s\n", source);
76 fclose(fpTmp);
77
78 {
79 PROCESS_INFORMATION pi;
80 STARTUPINFO si;
81 char pargs[300];
82 sprintf(pargs,
83 "c:\\progra~1\\mi5889~1\\utilit~1\\bin\\x86\\fxc.exe "
84 "/T %s /Vn %s%d /Fh tmp.h tmp.hlsl",
85 // uncomment the following line to generate debug
86 // info in the shader header file (may be useful
87 // for testing/debuggging purposes, but it nearly
88 // doubles the size of the header file and compiled
89 // shader programs - off for production builds)
90 //"/Zi /T %s /Vn %s%d /Fh tmp.h tmp.hlsl",
91 target, name, flags);
92 fprintf(stderr, "%s\n", pargs);
93 memset(&si, 0, sizeof(si));
94 si.cb = sizeof(si);
95 si.dwFlags = STARTF_USESTDHANDLES;
96 //si.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE);
97 //fprintf(stderr, "%s\n", pargs);
98 val = CreateProcess(0, pargs, 0, 0, TRUE,
99 CREATE_NO_WINDOW, NULL, NULL, &si, &pi);
100
101 {
102 DWORD code;
103 do {
104 GetExitCodeProcess(pi.hProcess, &code);
105 //fprintf(stderr, "waiting...");
106 Sleep(100);
107 } while (code == STILL_ACTIVE);
108
109 if (code != 0) {
110 fprintf(stderr, "fxc failed for %s%d\n", name, flags);
111 }
112 }
113
114 CloseHandle(pi.hThread);
115 CloseHandle(pi.hProcess);
116 }
117
118 // append tmp.h to D3DShaders.h
119 {
120 int ch;
121 fpTmp = fopen("tmp.h", "r");
122 while ((ch = fgetc(fpTmp)) != EOF) {
123 fputc(ch, fpHeader);
124 }
125 fclose(fpTmp);
126 }
127 }
128
129 static void
D3DShaderGen_WritePixelShader(char * source,char * name,int flags)130 D3DShaderGen_WritePixelShader(char *source, char *name, int flags)
131 {
132 D3DShaderGen_WriteShader(source, "ps_2_0", name, flags);
133 }
134
135 #define MULTI_GRAD_CYCLE_METHOD (3 << 0)
136 /** Extracts the CycleMethod enum value from the given flags variable. */
137 #define EXTRACT_CYCLE_METHOD(flags) \
138 ((flags) & MULTI_GRAD_CYCLE_METHOD)
139
140 static void
D3DShaderGen_WriteShaderArray(char * name,int num)141 D3DShaderGen_WriteShaderArray(char *name, int num)
142 {
143 char array[5000];
144 char elem[30];
145 int i;
146
147 sprintf(array, "const DWORD *%sShaders[] =\n{\n", name);
148 for (i = 0; i < num; i++) {
149 if (num == 32 && EXTRACT_CYCLE_METHOD(i) == 3) {
150 // REMIND: what a hack!
151 sprintf(elem, " NULL,\n");
152 } else {
153 sprintf(elem, " %s%d,\n", name, i);
154 }
155 strcat(array, elem);
156 }
157 strcat(array, "};\n");
158
159 // append to D3DShaders.h
160 fprintf(fpHeader, "%s\n", array);
161 }
162
163 /**************************** ConvolveOp support ****************************/
164
165 static const char *convolveShaderSource =
166 // image to be convolved
167 "sampler2D baseImage : register(s0);"
168 // image edge limits:
169 // imgEdge.xy = imgMin.xy (anything < will be treated as edge case)
170 // imgEdge.zw = imgMax.xy (anything > will be treated as edge case)
171 "float4 imgEdge : register(c0);"
172 // value for each location in the convolution kernel:
173 // kernelVals[i].x = offsetX[i]
174 // kernelVals[i].y = offsetY[i]
175 // kernelVals[i].z = kernel[i]
176 "float3 kernelVals[%d] : register(c1);"
177 ""
178 "void main(in float2 tc : TEXCOORD0,"
179 " inout float4 color : COLOR0)"
180 "{"
181 " float4 sum = imgEdge - tc.xyxy;"
182 ""
183 " if (sum.x > 0 || sum.y > 0 || sum.z < 0 || sum.w < 0) {"
184 // (placeholder for edge condition code)
185 " color = %s;"
186 " } else {"
187 " int i;"
188 " sum = float4(0, 0, 0, 0);"
189 " for (i = 0; i < %d; i++) {"
190 " sum +="
191 " kernelVals[i].z *"
192 " tex2D(baseImage, tc + kernelVals[i].xy);"
193 " }"
194 // modulate with current color in order to apply extra alpha
195 " color *= sum;"
196 " }"
197 ""
198 "}";
199
200 /**
201 * Flags that can be bitwise-or'ed together to control how the shader
202 * source code is generated.
203 */
204 #define CONVOLVE_EDGE_ZERO_FILL (1 << 0)
205 #define CONVOLVE_5X5 (1 << 1)
206 #define MAX_CONVOLVE (1 << 2)
207
208 static void
D3DShaderGen_GenerateConvolveShader(int flags)209 D3DShaderGen_GenerateConvolveShader(int flags)
210 {
211 int kernelMax = IS_SET(CONVOLVE_5X5) ? 25 : 9;
212 char *edge;
213 char finalSource[2000];
214
215 J2dTraceLn1(J2D_TRACE_INFO,
216 "D3DShaderGen_GenerateConvolveShader: flags=%d",
217 flags);
218
219 if (IS_SET(CONVOLVE_EDGE_ZERO_FILL)) {
220 // EDGE_ZERO_FILL: fill in zero at the edges
221 edge = "float4(0, 0, 0, 0)";
222 } else {
223 // EDGE_NO_OP: use the source pixel color at the edges
224 edge = "tex2D(baseImage, tc)";
225 }
226
227 // compose the final source code string from the various pieces
228 sprintf(finalSource, convolveShaderSource,
229 kernelMax, edge, kernelMax);
230
231 D3DShaderGen_WritePixelShader(finalSource, "convolve", flags);
232 }
233
234 /**************************** RescaleOp support *****************************/
235
236 static const char *rescaleShaderSource =
237 // image to be rescaled
238 "sampler2D baseImage : register(s0);"
239 // vector containing scale factors
240 "float4 scaleFactors : register(c0);"
241 // vector containing offsets
242 "float4 offsets : register(c1);"
243 ""
244 "void main(in float2 tc : TEXCOORD0,"
245 " inout float4 color : COLOR0)"
246 "{"
247 " float4 srcColor = tex2D(baseImage, tc);"
248 ""
249 // (placeholder for un-premult code)
250 " %s"
251 ""
252 // rescale source value
253 " float4 result = (srcColor * scaleFactors) + offsets;"
254 ""
255 // (placeholder for re-premult code)
256 " %s"
257 ""
258 // modulate with current color in order to apply extra alpha
259 " color *= result;"
260 "}";
261
262 /**
263 * Flags that can be bitwise-or'ed together to control how the shader
264 * source code is generated.
265 */
266 #define RESCALE_NON_PREMULT (1 << 0)
267 #define MAX_RESCALE (1 << 1)
268
269 static void
D3DShaderGen_GenerateRescaleShader(int flags)270 D3DShaderGen_GenerateRescaleShader(int flags)
271 {
272 char *preRescale = "";
273 char *postRescale = "";
274 char finalSource[2000];
275
276 J2dTraceLn1(J2D_TRACE_INFO,
277 "D3DShaderGen_GenerateRescaleShader: flags=%d",
278 flags);
279
280 if (IS_SET(RESCALE_NON_PREMULT)) {
281 preRescale = "srcColor.rgb /= srcColor.a;";
282 postRescale = "result.rgb *= result.a;";
283 }
284
285 // compose the final source code string from the various pieces
286 sprintf(finalSource, rescaleShaderSource,
287 preRescale, postRescale);
288
289 D3DShaderGen_WritePixelShader(finalSource, "rescale", flags);
290 }
291
292 /**************************** LookupOp support ******************************/
293
294 static const char *lookupShaderSource =
295 // source image (bound to texture unit 0)
296 "sampler2D baseImage : register(s0);"
297 // lookup table (bound to texture unit 1)
298 "sampler2D lookupTable : register(s1);"
299 // offset subtracted from source index prior to lookup step
300 "float4 offset : register(c0);"
301 ""
302 "void main(in float2 tc : TEXCOORD0,"
303 " inout float4 color : COLOR0)"
304 "{"
305 " float4 srcColor = tex2D(baseImage, tc);"
306 // (placeholder for un-premult code)
307 " %s"
308 // subtract offset from original index
309 " float4 srcIndex = srcColor - offset;"
310 // use source value as input to lookup table (note that
311 // "v" texcoords are hardcoded to hit texel centers of
312 // each row/band in texture)
313 " float4 result;"
314 " result.r = tex2D(lookupTable, float2(srcIndex.r, 0.125)).r;"
315 " result.g = tex2D(lookupTable, float2(srcIndex.g, 0.375)).r;"
316 " result.b = tex2D(lookupTable, float2(srcIndex.b, 0.625)).r;"
317 // (placeholder for alpha store code)
318 " %s"
319 // (placeholder for re-premult code)
320 " %s"
321 // modulate with current color in order to apply extra alpha
322 " color *= result;"
323 "}";
324
325 /**
326 * Flags that can be bitwise-or'ed together to control how the shader
327 * source code is generated.
328 */
329 #define LOOKUP_USE_SRC_ALPHA (1 << 0)
330 #define LOOKUP_NON_PREMULT (1 << 1)
331 #define MAX_LOOKUP (1 << 2)
332
333 static void
D3DShaderGen_GenerateLookupShader(int flags)334 D3DShaderGen_GenerateLookupShader(int flags)
335 {
336 char *alpha;
337 char *preLookup = "";
338 char *postLookup = "";
339 char finalSource[2000];
340
341 J2dTraceLn1(J2D_TRACE_INFO,
342 "D3DShaderGen_GenerateLookupShader: flags=%d",
343 flags);
344
345 if (IS_SET(LOOKUP_USE_SRC_ALPHA)) {
346 // when numComps is 1 or 3, the alpha is not looked up in the table;
347 // just keep the alpha from the source fragment
348 alpha = "result.a = srcColor.a;";
349 } else {
350 // when numComps is 4, the alpha is looked up in the table, just
351 // like the other color components from the source fragment
352 alpha = "result.a = tex2D(lookupTable, float2(srcIndex.a, 0.875)).r;";
353 }
354 if (IS_SET(LOOKUP_NON_PREMULT)) {
355 preLookup = "srcColor.rgb /= srcColor.a;";
356 postLookup = "result.rgb *= result.a;";
357 }
358
359 // compose the final source code string from the various pieces
360 sprintf(finalSource, lookupShaderSource,
361 preLookup, alpha, postLookup);
362
363 D3DShaderGen_WritePixelShader(finalSource, "lookup", flags);
364 }
365
366 /************************* GradientPaint support ****************************/
367
368 /*
369 * To simplify the code and to make it easier to upload a number of
370 * uniform values at once, we pack a bunch of scalar (float) values
371 * into a single float3 below. Here's how the values are related:
372 *
373 * params.x = p0
374 * params.y = p1
375 * params.z = p3
376 */
377 static const char *basicGradientShaderSource =
378 "float3 params : register (c0);"
379 "float4 color1 : register (c1);"
380 "float4 color2 : register (c2);"
381 // (placeholder for mask variable)
382 "%s"
383 ""
384 // (placeholder for mask texcoord input)
385 "void main(%s"
386 " in float4 winCoord : TEXCOORD%d,"
387 " inout float4 color : COLOR0)"
388 "{"
389 " float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);"
390 " float dist = dot(params.xyz, fragCoord);"
391 ""
392 // the setup code for p0/p1/p3 translates/scales to hit texel
393 // centers (at 0.25 and 0.75) because it is needed for the
394 // original/fast texture-based implementation, but it is not
395 // desirable for this shader-based implementation, so we
396 // re-transform the value here...
397 " dist = (dist - 0.25) * 2.0;"
398 ""
399 " float fraction;"
400 // (placeholder for cycle code)
401 " %s"
402 ""
403 " float4 result = lerp(color1, color2, fraction);"
404 ""
405 // (placeholder for mask modulation code)
406 " %s"
407 ""
408 // modulate with current color in order to apply extra alpha
409 " color *= result;"
410 "}";
411
412 /**
413 * Flags that can be bitwise-or'ed together to control how the shader
414 * source code is generated.
415 */
416 #define BASIC_GRAD_IS_CYCLIC (1 << 0)
417 #define BASIC_GRAD_USE_MASK (1 << 1)
418 #define MAX_BASIC_GRAD (1 << 2)
419
420 static void
D3DShaderGen_GenerateBasicGradShader(int flags)421 D3DShaderGen_GenerateBasicGradShader(int flags)
422 {
423 int colorSampler = IS_SET(BASIC_GRAD_USE_MASK) ? 1 : 0;
424 char *cycleCode;
425 char *maskVars = "";
426 char *maskInput = "";
427 char *maskCode = "";
428 char finalSource[3000];
429
430 J2dTraceLn1(J2D_TRACE_INFO,
431 "D3DShaderGen_GenerateBasicGradShader",
432 flags);
433
434 if (IS_SET(BASIC_GRAD_IS_CYCLIC)) {
435 cycleCode =
436 "fraction = 1.0 - (abs(frac(dist * 0.5) - 0.5) * 2.0);";
437 } else {
438 cycleCode =
439 "fraction = clamp(dist, 0.0, 1.0);";
440 }
441
442 if (IS_SET(BASIC_GRAD_USE_MASK)) {
443 /*
444 * This code modulates the calculated result color with the
445 * corresponding alpha value from the alpha mask texture active
446 * on texture unit 0. Only needed when useMask is true (i.e., only
447 * for MaskFill operations).
448 */
449 maskVars = "sampler2D mask : register(s0);";
450 maskInput = "in float4 maskCoord : TEXCOORD0,";
451 maskCode = "result *= tex2D(mask, maskCoord.xy).a;";
452 }
453
454 // compose the final source code string from the various pieces
455 sprintf(finalSource, basicGradientShaderSource,
456 maskVars, maskInput, colorSampler, cycleCode, maskCode);
457
458 D3DShaderGen_WritePixelShader(finalSource, "grad", flags);
459 }
460
461 /****************** Shared MultipleGradientPaint support ********************/
462
463 /**
464 * These constants are identical to those defined in the
465 * MultipleGradientPaint.CycleMethod enum; they are copied here for
466 * convenience (ideally we would pull them directly from the Java level,
467 * but that entails more hassle than it is worth).
468 */
469 #define CYCLE_NONE 0
470 #define CYCLE_REFLECT 1
471 #define CYCLE_REPEAT 2
472
473 /**
474 * The following constants are flags that can be bitwise-or'ed together
475 * to control how the MultipleGradientPaint shader source code is generated:
476 *
477 * MULTI_GRAD_CYCLE_METHOD
478 * Placeholder for the CycleMethod enum constant.
479 *
480 * MULTI_GRAD_LARGE
481 * If set, use the (slower) shader that supports a larger number of
482 * gradient colors; otherwise, use the optimized codepath. See
483 * the MAX_FRACTIONS_SMALL/LARGE constants below for more details.
484 *
485 * MULTI_GRAD_USE_MASK
486 * If set, apply the alpha mask value from texture unit 1 to the
487 * final color result (only used in the MaskFill case).
488 *
489 * MULTI_GRAD_LINEAR_RGB
490 * If set, convert the linear RGB result back into the sRGB color space.
491 */
492 //#define MULTI_GRAD_CYCLE_METHOD (3 << 0)
493 #define MULTI_GRAD_LARGE (1 << 2)
494 #define MULTI_GRAD_USE_MASK (1 << 3)
495 #define MULTI_GRAD_LINEAR_RGB (1 << 4)
496
497 // REMIND
498 #define MAX_MULTI_GRAD (1 << 5)
499
500 /** Extracts the CycleMethod enum value from the given flags variable. */
501 //#define EXTRACT_CYCLE_METHOD(flags) \
502 // ((flags) & MULTI_GRAD_CYCLE_METHOD)
503
504 /**
505 * The maximum number of gradient "stops" supported by the fragment shader
506 * and related code. When the MULTI_GRAD_LARGE flag is set, we will use
507 * MAX_FRACTIONS_LARGE; otherwise, we use MAX_FRACTIONS_SMALL. By having
508 * two separate values, we can have one highly optimized shader (SMALL) that
509 * supports only a few fractions/colors, and then another, less optimal
510 * shader that supports more stops.
511 */
512 #define MAX_FRACTIONS 8
513 #define MAX_FRACTIONS_LARGE MAX_FRACTIONS
514 #define MAX_FRACTIONS_SMALL 4
515
516 /**
517 * The maximum number of gradient colors supported by all of the gradient
518 * fragment shaders. Note that this value must be a power of two, as it
519 * determines the size of the 1D texture created below. It also must be
520 * greater than or equal to MAX_FRACTIONS (there is no strict requirement
521 * that the two values be equal).
522 */
523 #define MAX_COLORS 16
524
525 static const char *multiGradientShaderSource =
526 // gradient texture size (in texels)
527 "#define TEXTURE_SIZE %d\n"
528 // maximum number of fractions/colors supported by this shader
529 "#define MAX_FRACTIONS %d\n"
530 // size of a single texel
531 "#define FULL_TEXEL (1.0 / float(TEXTURE_SIZE))\n"
532 // size of half of a single texel
533 "#define HALF_TEXEL (FULL_TEXEL / 2.0)\n"
534 // texture containing the gradient colors
535 "sampler2D colors : register (s%d);"
536 // array of gradient stops/fractions and corresponding scale factors
537 // fractions[i].x = gradientStop[i]
538 // fractions[i].y = scaleFactor[i]
539 "float2 fractions[MAX_FRACTIONS] : register (c0);"
540 // (placeholder for mask variable)
541 "%s"
542 // (placeholder for Linear/RadialGP-specific variables)
543 "%s"
544 ""
545 // (placeholder for mask texcoord input)
546 "void main(%s"
547 " in float4 winCoord : TEXCOORD%d,"
548 " inout float4 color : COLOR0)"
549 "{"
550 " float dist;"
551 // (placeholder for Linear/RadialGradientPaint-specific code)
552 " %s"
553 ""
554 " float4 result;"
555 // (placeholder for CycleMethod-specific code)
556 " %s"
557 ""
558 // (placeholder for ColorSpace conversion code)
559 " %s"
560 ""
561 // (placeholder for mask modulation code)
562 " %s"
563 ""
564 // modulate with current color in order to apply extra alpha
565 " color *= result;"
566 "}";
567
568 /*
569 * Note: An earlier version of this code would simply calculate a single
570 * texcoord:
571 * "tc = HALF_TEXEL + (FULL_TEXEL * relFraction);"
572 * and then use that value to do a single texture lookup, taking advantage
573 * of the LINEAR texture filtering mode which in theory will do the
574 * appropriate linear interpolation between adjacent texels, like this:
575 * "float4 result = tex2D(colors, float2(tc, 0.5));"
576 *
577 * The problem with that approach is that on certain hardware (from ATI,
578 * notably) the LINEAR texture fetch unit has low precision, and would
579 * for instance only produce 64 distinct grayscales between white and black,
580 * instead of the expected 256. The visual banding caused by this issue
581 * is severe enough to likely cause complaints from developers, so we have
582 * devised a new approach below that instead manually fetches the two
583 * relevant neighboring texels and then performs the linear interpolation
584 * using the lerp() instruction (which does not suffer from the precision
585 * issues of the fixed-function texture filtering unit). This new approach
586 * requires a few more instructions and is therefore slightly slower than
587 * the old approach (not more than 10% or so).
588 */
589 static const char *texCoordCalcCode =
590 "int i;"
591 "float relFraction = 0.0;"
592 "for (i = 0; i < MAX_FRACTIONS-1; i++) {"
593 " relFraction +="
594 " clamp((dist - fractions[i].x) * fractions[i].y, 0.0, 1.0);"
595 "}"
596 // we offset by half a texel so that we find the linearly interpolated
597 // color between the two texel centers of interest
598 "float intPart = floor(relFraction);"
599 "float tc1 = HALF_TEXEL + (FULL_TEXEL * intPart);"
600 "float tc2 = HALF_TEXEL + (FULL_TEXEL * (intPart + 1.0));"
601 "float4 clr1 = tex2D(colors, float2(tc1, 0.5));"
602 "float4 clr2 = tex2D(colors, float2(tc2, 0.5));"
603 "result = lerp(clr1, clr2, frac(relFraction));";
604
605 /** Code for NO_CYCLE that gets plugged into the CycleMethod placeholder. */
606 static const char *noCycleCode =
607 "if (dist <= 0.0) {"
608 " result = tex2D(colors, float2(0.0, 0.5));"
609 "} else if (dist >= 1.0) {"
610 " result = tex2D(colors, float2(1.0, 0.5));"
611 "} else {"
612 // (placeholder for texcoord calculation)
613 " %s"
614 "}";
615
616 /** Code for REFLECT that gets plugged into the CycleMethod placeholder. */
617 static const char *reflectCode =
618 "dist = 1.0 - (abs(frac(dist * 0.5) - 0.5) * 2.0);"
619 // (placeholder for texcoord calculation)
620 "%s";
621
622 /** Code for REPEAT that gets plugged into the CycleMethod placeholder. */
623 static const char *repeatCode =
624 "dist = frac(dist);"
625 // (placeholder for texcoord calculation)
626 "%s";
627
628 static void
D3DShaderGen_GenerateMultiGradShader(int flags,char * name,char * paintVars,char * distCode)629 D3DShaderGen_GenerateMultiGradShader(int flags, char *name,
630 char *paintVars, char *distCode)
631 {
632 char *maskVars = "";
633 char *maskInput = "";
634 char *maskCode = "";
635 char *colorSpaceCode = "";
636 char cycleCode[1500];
637 char finalSource[3000];
638 int colorSampler = IS_SET(MULTI_GRAD_USE_MASK) ? 1 : 0;
639 int cycleMethod = EXTRACT_CYCLE_METHOD(flags);
640 int maxFractions = IS_SET(MULTI_GRAD_LARGE) ?
641 MAX_FRACTIONS_LARGE : MAX_FRACTIONS_SMALL;
642
643 J2dTraceLn(J2D_TRACE_INFO, "OGLPaints_CreateMultiGradProgram");
644
645 if (IS_SET(MULTI_GRAD_USE_MASK)) {
646 /*
647 * This code modulates the calculated result color with the
648 * corresponding alpha value from the alpha mask texture active
649 * on texture unit 0. Only needed when useMask is true (i.e., only
650 * for MaskFill operations).
651 */
652 maskVars = "sampler2D mask : register(s0);";
653 maskInput = "in float4 maskCoord : TEXCOORD0,";
654 maskCode = "result *= tex2D(mask, maskCoord.xy).a;";
655 }
656
657 if (IS_SET(MULTI_GRAD_LINEAR_RGB)) {
658 /*
659 * This code converts a single pixel in linear RGB space back
660 * into sRGB (note: this code was adapted from the
661 * MultipleGradientPaintContext.convertLinearRGBtoSRGB() method).
662 */
663 colorSpaceCode =
664 "result.rgb = 1.055 * pow(result.rgb, 0.416667) - 0.055;";
665 }
666
667 if (cycleMethod == CYCLE_NONE) {
668 sprintf(cycleCode, noCycleCode, texCoordCalcCode);
669 } else if (cycleMethod == CYCLE_REFLECT) {
670 sprintf(cycleCode, reflectCode, texCoordCalcCode);
671 } else { // (cycleMethod == CYCLE_REPEAT)
672 sprintf(cycleCode, repeatCode, texCoordCalcCode);
673 }
674
675 // compose the final source code string from the various pieces
676 sprintf(finalSource, multiGradientShaderSource,
677 MAX_COLORS, maxFractions, colorSampler,
678 maskVars, paintVars, maskInput, colorSampler,
679 distCode, cycleCode, colorSpaceCode, maskCode);
680
681 D3DShaderGen_WritePixelShader(finalSource, name, flags);
682 }
683
684 /********************** LinearGradientPaint support *************************/
685
686 static void
D3DShaderGen_GenerateLinearGradShader(int flags)687 D3DShaderGen_GenerateLinearGradShader(int flags)
688 {
689 char *paintVars;
690 char *distCode;
691
692 J2dTraceLn1(J2D_TRACE_INFO,
693 "D3DShaderGen_GenerateLinearGradShader",
694 flags);
695
696 /*
697 * To simplify the code and to make it easier to upload a number of
698 * uniform values at once, we pack a bunch of scalar (float) values
699 * into a single float3 below. Here's how the values are related:
700 *
701 * params.x = p0
702 * params.y = p1
703 * params.z = p3
704 */
705 paintVars =
706 "float3 params : register(c16);";
707 distCode =
708 "float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);"
709 "dist = dot(params.xyz, fragCoord);";
710
711 D3DShaderGen_GenerateMultiGradShader(flags, "linear",
712 paintVars, distCode);
713 }
714
715 /********************** RadialGradientPaint support *************************/
716
717 static void
D3DShaderGen_GenerateRadialGradShader(int flags)718 D3DShaderGen_GenerateRadialGradShader(int flags)
719 {
720 char *paintVars;
721 char *distCode;
722
723 J2dTraceLn1(J2D_TRACE_INFO,
724 "D3DShaderGen_GenerateRadialGradShader",
725 flags);
726
727 /*
728 * To simplify the code and to make it easier to upload a number of
729 * uniform values at once, we pack a bunch of scalar (float) values
730 * into float3 values below. Here's how the values are related:
731 *
732 * m0.x = m00
733 * m0.y = m01
734 * m0.z = m02
735 *
736 * m1.x = m10
737 * m1.y = m11
738 * m1.z = m12
739 *
740 * precalc.x = focusX
741 * precalc.y = 1.0 - (focusX * focusX)
742 * precalc.z = 1.0 / precalc.z
743 */
744 paintVars =
745 "float3 m0 : register(c16);"
746 "float3 m1 : register(c17);"
747 "float3 precalc : register(c18);";
748
749 /*
750 * The following code is derived from Daniel Rice's whitepaper on
751 * radial gradient performance (attached to the bug report for 6521533).
752 * Refer to that document as well as the setup code in the Java-level
753 * BufferedPaints.setRadialGradientPaint() method for more details.
754 */
755 distCode =
756 "float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);"
757 "float x = dot(fragCoord, m0);"
758 "float y = dot(fragCoord, m1);"
759 "float xfx = x - precalc.x;"
760 "dist = (precalc.x*xfx + sqrt(xfx*xfx + y*y*precalc.y))*precalc.z;";
761
762 D3DShaderGen_GenerateMultiGradShader(flags, "radial",
763 paintVars, distCode);
764 }
765
766 /*************************** LCD text support *******************************/
767
768 // REMIND: Shader uses texture addressing operations in a dependency chain
769 // that is too complex for the target shader model (ps_2_0) to handle
770 // (ugh, I guess we can either require ps_3_0 or just use
771 // the slower pow intrinsic)
772 #define POW_LUT 0
773
774 static const char *lcdTextShaderSource =
775 "float3 srcAdj : register(c0);"
776 "sampler2D glyphTex : register(s0);"
777 "sampler2D dstTex : register(s1);"
778 #if POW_LUT
779 "sampler3D invgammaTex : register(s2);"
780 "sampler3D gammaTex : register(s3);"
781 #else
782 "float3 invgamma : register(c1);"
783 "float3 gamma : register(c2);"
784 #endif
785 ""
786 "void main(in float2 tc0 : TEXCOORD0,"
787 " in float2 tc1 : TEXCOORD1,"
788 " inout float4 color : COLOR0)"
789 "{"
790 // load the RGB value from the glyph image at the current texcoord
791 " float3 glyphClr = tex2D(glyphTex, tc0).rgb;"
792 " if (!any(glyphClr)) {"
793 // zero coverage, so skip this fragment
794 " discard;"
795 " }"
796 // load the RGB value from the corresponding destination pixel
797 " float3 dstClr = tex2D(dstTex, tc1).rgb;"
798 // gamma adjust the dest color using the invgamma LUT
799 #if POW_LUT
800 " float3 dstAdj = tex3D(invgammaTex, dstClr).rgb;"
801 #else
802 " float3 dstAdj = pow(dstClr, invgamma);"
803 #endif
804 // linearly interpolate the three color values
805 " float3 result = lerp(dstAdj, srcAdj, glyphClr);"
806 // gamma re-adjust the resulting color (alpha is always set to 1.0)
807 #if POW_LUT
808 " color = float4(tex3D(gammaTex, result).rgb, 1.0);"
809 #else
810 " color = float4(pow(result, gamma), 1.0);"
811 #endif
812 "}";
813
814 static void
D3DShaderGen_GenerateLCDTextShader()815 D3DShaderGen_GenerateLCDTextShader()
816 {
817 J2dTraceLn(J2D_TRACE_INFO, "D3DShaderGen_GenerateLCDTextShader");
818
819 D3DShaderGen_WritePixelShader((char *)lcdTextShaderSource, "lcdtext", 0);
820 }
821
822 /*************************** AA support *******************************/
823
824 /*
825 * This shader fills the space between an outer and inner parallelogram.
826 * It can be used to draw an outline by specifying both inner and outer
827 * values. It fills pixels by estimating what portion falls inside the
828 * outer shape, and subtracting an estimate of what portion falls inside
829 * the inner shape. Specifying both inner and outer values produces a
830 * standard "wide outline". Specifying an inner shape that falls far
831 * outside the outer shape allows the same shader to fill the outer
832 * shape entirely since pixels that fall within the outer shape are never
833 * inside the inner shape and so they are filled based solely on their
834 * coverage of the outer shape.
835 *
836 * The setup code renders this shader over the bounds of the outer
837 * shape (or the only shape in the case of a fill operation) and
838 * sets the texture 0 coordinates so that 0,0=>0,1=>1,1=>1,0 in those
839 * texture coordinates map to the four corners of the parallelogram.
840 * Similarly the texture 1 coordinates map the inner shape to the
841 * unit square as well, but in a different coordinate system.
842 *
843 * When viewed in the texture coordinate systems the parallelograms
844 * we are filling are unit squares, but the pixels have then become
845 * tiny parallelograms themselves. Both of the texture coordinate
846 * systems are affine transforms so the rate of change in X and Y
847 * of the texture coordinates are essentially constants and happen
848 * to correspond to the size and direction of the slanted sides of
849 * the distorted pixels relative to the "square mapped" boundary
850 * of the parallelograms.
851 *
852 * The shader uses the ddx() and ddy() functions to measure the "rate
853 * of change" of these texture coordinates and thus gets an accurate
854 * measure of the size and shape of a pixel relative to the two
855 * parallelograms. It then uses the bounds of the size and shape
856 * of a pixel to intersect with the unit square to estimate the
857 * coverage of the pixel. Unfortunately, without a lot more work
858 * to calculate the exact area of intersection between a unit
859 * square (the original parallelogram) and a parallelogram (the
860 * distorted pixel), this shader only approximates the pixel
861 * coverage, but emperically the estimate is very useful and
862 * produces visually pleasing results, if not theoretically accurate.
863 */
864 static const char *aaShaderSource =
865 "void main(in float2 tco : TEXCOORD0,"
866 " in float2 tci : TEXCOORD1,"
867 " inout float4 color : COLOR0)"
868 "{"
869 // Calculate the vectors for the "legs" of the pixel parallelogram
870 // for the outer parallelogram.
871 " float2 oleg1 = ddx(tco);"
872 " float2 oleg2 = ddy(tco);"
873 // Calculate the bounds of the distorted pixel parallelogram.
874 " float2 omin = min(tco, tco+oleg1);"
875 " omin = min(omin, tco+oleg2);"
876 " omin = min(omin, tco+oleg1+oleg2);"
877 " float2 omax = max(tco, tco+oleg1);"
878 " omax = max(omax, tco+oleg2);"
879 " omax = max(omax, tco+oleg1+oleg2);"
880 // Calculate the vectors for the "legs" of the pixel parallelogram
881 // for the inner parallelogram.
882 " float2 ileg1 = ddx(tci);"
883 " float2 ileg2 = ddy(tci);"
884 // Calculate the bounds of the distorted pixel parallelogram.
885 " float2 imin = min(tci, tci+ileg1);"
886 " imin = min(imin, tci+ileg2);"
887 " imin = min(imin, tci+ileg1+ileg2);"
888 " float2 imax = max(tci, tci+ileg1);"
889 " imax = max(imax, tci+ileg2);"
890 " imax = max(imax, tci+ileg1+ileg2);"
891 // Clamp the bounds of the parallelograms to the unit square to
892 // estimate the intersection of the pixel parallelogram with
893 // the unit square. The ratio of the 2 rectangle areas is a
894 // reasonable estimate of the proportion of coverage.
895 " float2 o1 = clamp(omin, 0.0, 1.0);"
896 " float2 o2 = clamp(omax, 0.0, 1.0);"
897 " float oint = (o2.y-o1.y)*(o2.x-o1.x);"
898 " float oarea = (omax.y-omin.y)*(omax.x-omin.x);"
899 " float2 i1 = clamp(imin, 0.0, 1.0);"
900 " float2 i2 = clamp(imax, 0.0, 1.0);"
901 " float iint = (i2.y-i1.y)*(i2.x-i1.x);"
902 " float iarea = (imax.y-imin.y)*(imax.x-imin.x);"
903 // Proportion of pixel in outer shape minus the proportion
904 // of pixel in the inner shape == the coverage of the pixel
905 // in the area between the two.
906 " float coverage = oint/oarea - iint / iarea;"
907 " color *= coverage;"
908 "}";
909
910 static void
D3DShaderGen_GenerateAAParallelogramShader()911 D3DShaderGen_GenerateAAParallelogramShader()
912 {
913 J2dTraceLn(J2D_TRACE_INFO, "D3DShaderGen_GenerateAAParallelogramShader");
914
915 D3DShaderGen_WriteShader((char *)aaShaderSource, "ps_2_a", "aapgram", 0);
916 }
917
918 /**************************** Main entrypoint *******************************/
919
920 static void
D3DShaderGen_GenerateAllShaders()921 D3DShaderGen_GenerateAllShaders()
922 {
923 int i;
924
925 #if 1
926 // Generate BufferedImageOp shaders
927 for (i = 0; i < MAX_RESCALE; i++) {
928 D3DShaderGen_GenerateRescaleShader(i);
929 }
930 D3DShaderGen_WriteShaderArray("rescale", MAX_RESCALE);
931 for (i = 0; i < MAX_CONVOLVE; i++) {
932 D3DShaderGen_GenerateConvolveShader(i);
933 }
934 D3DShaderGen_WriteShaderArray("convolve", MAX_CONVOLVE);
935 for (i = 0; i < MAX_LOOKUP; i++) {
936 D3DShaderGen_GenerateLookupShader(i);
937 }
938 D3DShaderGen_WriteShaderArray("lookup", MAX_LOOKUP);
939
940 // Generate Paint shaders
941 for (i = 0; i < MAX_BASIC_GRAD; i++) {
942 D3DShaderGen_GenerateBasicGradShader(i);
943 }
944 D3DShaderGen_WriteShaderArray("grad", MAX_BASIC_GRAD);
945 for (i = 0; i < MAX_MULTI_GRAD; i++) {
946 if (EXTRACT_CYCLE_METHOD(i) == 3) continue; // REMIND
947 D3DShaderGen_GenerateLinearGradShader(i);
948 }
949 D3DShaderGen_WriteShaderArray("linear", MAX_MULTI_GRAD);
950 for (i = 0; i < MAX_MULTI_GRAD; i++) {
951 if (EXTRACT_CYCLE_METHOD(i) == 3) continue; // REMIND
952 D3DShaderGen_GenerateRadialGradShader(i);
953 }
954 D3DShaderGen_WriteShaderArray("radial", MAX_MULTI_GRAD);
955
956 // Generate LCD text shader
957 D3DShaderGen_GenerateLCDTextShader();
958
959 // Genereate Shader to fill Antialiased parallelograms
960 D3DShaderGen_GenerateAAParallelogramShader();
961 #else
962 /*
963 for (i = 0; i < MAX_RESCALE; i++) {
964 D3DShaderGen_GenerateRescaleShader(i);
965 }
966 D3DShaderGen_WriteShaderArray("rescale", MAX_RESCALE);
967 */
968 //D3DShaderGen_GenerateConvolveShader(2);
969 //D3DShaderGen_GenerateLCDTextShader();
970 //D3DShaderGen_GenerateLinearGradShader(16);
971 D3DShaderGen_GenerateBasicGradShader(0);
972 #endif
973 }
974
975 int
main(int argc,char ** argv)976 main(int argc, char **argv)
977 {
978 fpHeader = fopen(strHeaderFile, "a");
979
980 D3DShaderGen_GenerateAllShaders();
981
982 fclose(fpHeader);
983
984 return 0;
985 }
986