1 /*
2  * Pixel and vertex shaders implementation using ARB_vertex_program
3  * and ARB_fragment_program GL extensions.
4  *
5  * Copyright 2002-2003 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006 Ivan Gyurdiev
10  * Copyright 2006 Jason Green
11  * Copyright 2006 Henri Verbeet
12  * Copyright 2007-2011, 2013-2014 Stefan Dösinger for CodeWeavers
13  * Copyright 2009 Henri Verbeet for CodeWeavers
14  *
15  * This library is free software; you can redistribute it and/or
16  * modify it under the terms of the GNU Lesser General Public
17  * License as published by the Free Software Foundation; either
18  * version 2.1 of the License, or (at your option) any later version.
19  *
20  * This library is distributed in the hope that it will be useful,
21  * but WITHOUT ANY WARRANTY; without even the implied warranty of
22  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23  * Lesser General Public License for more details.
24  *
25  * You should have received a copy of the GNU Lesser General Public
26  * License along with this library; if not, write to the Free Software
27  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
28  */
29 
30 #include "wined3d_private.h"
31 
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
33 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35 WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
36 
37 static BOOL shader_is_pshader_version(enum wined3d_shader_type type)
38 {
39     return type == WINED3D_SHADER_TYPE_PIXEL;
40 }
41 
42 static BOOL shader_is_vshader_version(enum wined3d_shader_type type)
43 {
44     return type == WINED3D_SHADER_TYPE_VERTEX;
45 }
46 
47 static const char *get_line(const char **ptr)
48 {
49     const char *p, *q;
50 
51     p = *ptr;
52     if (!(q = strstr(p, "\n")))
53     {
54         if (!*p) return NULL;
55         *ptr += strlen(p);
56         return p;
57     }
58     *ptr = q + 1;
59 
60     return p;
61 }
62 
63 enum arb_helper_value
64 {
65     ARB_ZERO,
66     ARB_ONE,
67     ARB_TWO,
68     ARB_0001,
69     ARB_EPS,
70 
71     ARB_VS_REL_OFFSET
72 };
73 
74 static const char *arb_get_helper_value(enum wined3d_shader_type shader, enum arb_helper_value value)
75 {
76     if (shader != WINED3D_SHADER_TYPE_VERTEX && shader != WINED3D_SHADER_TYPE_PIXEL)
77     {
78         ERR("Unsupported shader type '%s'.\n", debug_shader_type(shader));
79         return "bad";
80     }
81 
82     if (shader == WINED3D_SHADER_TYPE_PIXEL)
83     {
84         switch (value)
85         {
86             case ARB_ZERO: return "ps_helper_const.x";
87             case ARB_ONE: return "ps_helper_const.y";
88             case ARB_TWO: return "coefmul.x";
89             case ARB_0001: return "ps_helper_const.xxxy";
90             case ARB_EPS: return "ps_helper_const.z";
91             default: break;
92         }
93     }
94     else
95     {
96         switch (value)
97         {
98             case ARB_ZERO: return "helper_const.x";
99             case ARB_ONE: return "helper_const.y";
100             case ARB_TWO: return "helper_const.z";
101             case ARB_EPS: return "helper_const.w";
102             case ARB_0001: return "helper_const.xxxy";
103             case ARB_VS_REL_OFFSET: return "rel_addr_const.y";
104         }
105     }
106     FIXME("Unmanaged %s shader helper constant requested: %u.\n",
107           shader == WINED3D_SHADER_TYPE_PIXEL ? "pixel" : "vertex", value);
108     switch (value)
109     {
110         case ARB_ZERO: return "0.0";
111         case ARB_ONE: return "1.0";
112         case ARB_TWO: return "2.0";
113         case ARB_0001: return "{0.0, 0.0, 0.0, 1.0}";
114         case ARB_EPS: return "1e-8";
115         default: return "bad";
116     }
117 }
118 
119 static inline BOOL ffp_clip_emul(const struct wined3d_context *context)
120 {
121     return context->lowest_disabled_stage < 7;
122 }
123 
124 /* ARB_program_shader private data */
125 
126 struct control_frame
127 {
128     struct                          list entry;
129     enum
130     {
131         IF,
132         IFC,
133         LOOP,
134         REP
135     } type;
136     BOOL                            muting;
137     BOOL                            outer_loop;
138     union
139     {
140         unsigned int                loop;
141         unsigned int                ifc;
142     } no;
143     struct wined3d_shader_loop_control loop_control;
144     BOOL                            had_else;
145 };
146 
147 struct arb_ps_np2fixup_info
148 {
149     struct ps_np2fixup_info         super;
150     /* For ARB we need an offset value:
151      * With both GLSL and ARB mode the NP2 fixup information (the texture dimensions) are stored in a
152      * consecutive way (GLSL uses a uniform array). Since ARB doesn't know the notion of a "standalone"
153      * array we need an offset to the index inside the program local parameter array. */
154     UINT                            offset;
155 };
156 
157 struct arb_ps_compile_args
158 {
159     struct ps_compile_args          super;
160     WORD                            bools;
161     WORD                            clip;  /* only a boolean, use a WORD for alignment */
162     unsigned char                   loop_ctrl[WINED3D_MAX_CONSTS_I][3];
163 };
164 
165 struct stb_const_desc
166 {
167     unsigned char           texunit;
168     UINT                    const_num;
169 };
170 
171 struct arb_ps_compiled_shader
172 {
173     struct arb_ps_compile_args      args;
174     struct arb_ps_np2fixup_info     np2fixup_info;
175     struct stb_const_desc           bumpenvmatconst[MAX_TEXTURES];
176     struct stb_const_desc           luminanceconst[MAX_TEXTURES];
177     UINT                            int_consts[WINED3D_MAX_CONSTS_I];
178     GLuint                          prgId;
179     UINT                            ycorrection;
180     unsigned char                   numbumpenvmatconsts;
181     char                            num_int_consts;
182 };
183 
184 struct arb_vs_compile_args
185 {
186     struct vs_compile_args          super;
187     union
188     {
189         struct
190         {
191             WORD                    bools;
192             unsigned char           clip_texcoord;
193             unsigned char           clipplane_mask;
194         }                           boolclip;
195         DWORD                       boolclip_compare;
196     } clip;
197     DWORD                           ps_signature;
198     union
199     {
200         unsigned char               samplers[4];
201         DWORD                       samplers_compare;
202     } vertex;
203     unsigned char                   loop_ctrl[WINED3D_MAX_CONSTS_I][3];
204 };
205 
206 struct arb_vs_compiled_shader
207 {
208     struct arb_vs_compile_args      args;
209     GLuint                          prgId;
210     UINT                            int_consts[WINED3D_MAX_CONSTS_I];
211     char                            num_int_consts;
212     char                            need_color_unclamp;
213     UINT                            pos_fixup;
214 };
215 
216 struct recorded_instruction
217 {
218     struct wined3d_shader_instruction ins;
219     struct list entry;
220 };
221 
222 struct shader_arb_ctx_priv
223 {
224     char addr_reg[20];
225     enum
226     {
227         /* plain GL_ARB_vertex_program or GL_ARB_fragment_program */
228         ARB,
229         /* GL_NV_vertex_program2_option or GL_NV_fragment_program_option */
230         NV2,
231         /* GL_NV_vertex_program3 or GL_NV_fragment_program2 */
232         NV3
233     } target_version;
234 
235     const struct arb_vs_compile_args    *cur_vs_args;
236     const struct arb_ps_compile_args    *cur_ps_args;
237     const struct arb_ps_compiled_shader *compiled_fprog;
238     const struct arb_vs_compiled_shader *compiled_vprog;
239     struct arb_ps_np2fixup_info         *cur_np2fixup_info;
240     struct list                         control_frames;
241     struct list                         record;
242     BOOL                                recording;
243     BOOL                                muted;
244     unsigned int                        num_loops, loop_depth, num_ifcs;
245     int                                 aL;
246     BOOL                                ps_post_process;
247 
248     unsigned int                        vs_clipplanes;
249     BOOL                                footer_written;
250     BOOL                                in_main_func;
251 
252     /* For 3.0 vertex shaders */
253     const char                          *vs_output[MAX_REG_OUTPUT];
254     /* For 2.x and earlier vertex shaders */
255     const char                          *texcrd_output[8], *color_output[2], *fog_output;
256 
257     /* 3.0 pshader input for compatibility with fixed function */
258     const char                          *ps_input[MAX_REG_INPUT];
259 };
260 
261 struct ps_signature
262 {
263     struct wined3d_shader_signature sig;
264     DWORD                               idx;
265     struct wine_rb_entry                entry;
266 };
267 
268 struct arb_pshader_private {
269     struct arb_ps_compiled_shader   *gl_shaders;
270     UINT                            num_gl_shaders, shader_array_size;
271     DWORD                           input_signature_idx;
272     DWORD                           clipplane_emulation;
273     BOOL                            clamp_consts;
274 };
275 
276 struct arb_vshader_private {
277     struct arb_vs_compiled_shader   *gl_shaders;
278     UINT                            num_gl_shaders, shader_array_size;
279     UINT rel_offset;
280 };
281 
282 struct shader_arb_priv
283 {
284     GLuint                  current_vprogram_id;
285     GLuint                  current_fprogram_id;
286     const struct arb_ps_compiled_shader *compiled_fprog;
287     const struct arb_vs_compiled_shader *compiled_vprog;
288     BOOL                    use_arbfp_fixed_func;
289     struct wine_rb_tree     fragment_shaders;
290     BOOL                    last_ps_const_clamped;
291     BOOL                    last_vs_color_unclamp;
292 
293     struct wine_rb_tree     signature_tree;
294     DWORD ps_sig_number;
295 
296     unsigned int highest_dirty_ps_const, highest_dirty_vs_const;
297     char vshader_const_dirty[WINED3D_MAX_VS_CONSTS_F];
298     char pshader_const_dirty[WINED3D_MAX_PS_CONSTS_F];
299     const struct wined3d_context *last_context;
300 
301     const struct wined3d_vertex_pipe_ops *vertex_pipe;
302     const struct fragment_pipeline *fragment_pipe;
303     BOOL ffp_proj_control;
304 };
305 
306 /* Context activation for state handlers is done by the caller. */
307 
308 static BOOL need_rel_addr_const(const struct arb_vshader_private *shader_data,
309         const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
310 {
311     if (shader_data->rel_offset) return TRUE;
312     if (!reg_maps->usesmova) return FALSE;
313     return !gl_info->supported[NV_VERTEX_PROGRAM2_OPTION];
314 }
315 
316 /* Returns TRUE if result.clip from GL_NV_vertex_program2 should be used and FALSE otherwise */
317 static inline BOOL use_nv_clip(const struct wined3d_gl_info *gl_info)
318 {
319     return gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]
320             && !(gl_info->quirks & WINED3D_QUIRK_NV_CLIP_BROKEN);
321 }
322 
323 static BOOL need_helper_const(const struct arb_vshader_private *shader_data,
324         const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
325 {
326     if (need_rel_addr_const(shader_data, reg_maps, gl_info)) return TRUE;
327     if (!gl_info->supported[NV_VERTEX_PROGRAM]) return TRUE; /* Need to init colors. */
328     if (gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) return TRUE; /* Load the immval offset. */
329     if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) return TRUE; /* Have to init texcoords. */
330     if (!use_nv_clip(gl_info)) return TRUE; /* Init the clip texcoord */
331     if (reg_maps->usesnrm) return TRUE; /* 0.0 */
332     if (reg_maps->usespow) return TRUE; /* EPS, 0.0 and 1.0 */
333     if (reg_maps->fog) return TRUE; /* Clamping fog coord, 0.0 and 1.0 */
334     return FALSE;
335 }
336 
337 static unsigned int reserved_vs_const(const struct arb_vshader_private *shader_data,
338         const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
339 {
340     unsigned int ret = 1;
341     /* We use one PARAM for the pos fixup, and in some cases one to load
342      * some immediate values into the shader. */
343     if (need_helper_const(shader_data, reg_maps, gl_info)) ++ret;
344     if (need_rel_addr_const(shader_data, reg_maps, gl_info)) ++ret;
345     return ret;
346 }
347 
348 /* Loads floating point constants into the currently set ARB_vertex/fragment_program.
349  * When constant_list == NULL, it will load all the constants.
350  *
351  * @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders)
352  *  or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders)
353  */
354 /* Context activation is done by the caller. */
355 static unsigned int shader_arb_load_constants_f(const struct wined3d_shader *shader,
356         const struct wined3d_gl_info *gl_info, GLuint target_type, unsigned int max_constants,
357         const struct wined3d_vec4 *constants, char *dirty_consts)
358 {
359     struct wined3d_shader_lconst *lconst;
360     unsigned int ret, i, j;
361 
362     if (TRACE_ON(d3d_constants))
363     {
364         for (i = 0; i < max_constants; ++i)
365         {
366             if (!dirty_consts[i])
367                 continue;
368             TRACE_(d3d_constants)("Loading constant %u: %s.\n", i, debug_vec4(&constants[i]));
369         }
370     }
371 
372     i = 0;
373 
374     /* In 1.X pixel shaders constants are implicitly clamped in the range [-1;1] */
375     if (target_type == GL_FRAGMENT_PROGRAM_ARB && shader->reg_maps.shader_version.major == 1)
376     {
377         float lcl_const[4];
378         /* ps 1.x supports only 8 constants, clamp only those. When switching between 1.x and higher
379          * shaders, the first 8 constants are marked dirty for reload
380          */
381         for (; i < min(8, max_constants); ++i)
382         {
383             if (!dirty_consts[i])
384                 continue;
385             dirty_consts[i] = 0;
386 
387             if (constants[i].x > 1.0f)
388                 lcl_const[0] = 1.0f;
389             else if (constants[i].x < -1.0f)
390                 lcl_const[0] = -1.0f;
391             else
392                 lcl_const[0] = constants[i].x;
393 
394             if (constants[i].y > 1.0f)
395                 lcl_const[1] = 1.0f;
396             else if (constants[i].y < -1.0f)
397                 lcl_const[1] = -1.0f;
398             else
399                 lcl_const[1] = constants[i].y;
400 
401             if (constants[i].z > 1.0f)
402                 lcl_const[2] = 1.0f;
403             else if (constants[i].z < -1.0f)
404                 lcl_const[2] = -1.0f;
405             else
406                 lcl_const[2] = constants[i].z;
407 
408             if (constants[i].w > 1.0f)
409                 lcl_const[3] = 1.0f;
410             else if (constants[i].w < -1.0f)
411                 lcl_const[3] = -1.0f;
412             else
413                 lcl_const[3] = constants[i].w;
414 
415             GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, lcl_const));
416         }
417 
418         /* If further constants are dirty, reload them without clamping.
419          *
420          * The alternative is not to touch them, but then we cannot reset the dirty constant count
421          * to zero. That's bad for apps that only use PS 1.x shaders, because in that case the code
422          * above would always re-check the first 8 constants since max_constant remains at the init
423          * value
424          */
425     }
426 
427     if (gl_info->supported[EXT_GPU_PROGRAM_PARAMETERS])
428     {
429         /* TODO: Benchmark if we're better of with finding the dirty constants ourselves,
430          * or just reloading *all* constants at once
431          *
432         GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, i, max_constants, constants + (i * 4)));
433          */
434         for (; i < max_constants; ++i)
435         {
436             if (!dirty_consts[i])
437                 continue;
438 
439             /* Find the next block of dirty constants */
440             dirty_consts[i] = 0;
441             j = i;
442             for (++i; (i < max_constants) && dirty_consts[i]; ++i)
443             {
444                 dirty_consts[i] = 0;
445             }
446 
447             GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, j, i - j, &constants[j].x));
448         }
449     }
450     else
451     {
452         for (; i < max_constants; ++i)
453         {
454             if (dirty_consts[i])
455             {
456                 dirty_consts[i] = 0;
457                 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, &constants[i].x));
458             }
459         }
460     }
461     checkGLcall("glProgramEnvParameter4fvARB()");
462 
463     /* Load immediate constants */
464     if (shader->load_local_constsF)
465     {
466         if (TRACE_ON(d3d_shader))
467         {
468             LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
469             {
470                 GLfloat* values = (GLfloat*)lconst->value;
471                 TRACE_(d3d_constants)("Loading local constants %i: %f, %f, %f, %f\n", lconst->idx,
472                         values[0], values[1], values[2], values[3]);
473             }
474         }
475         /* Immediate constants are clamped for 1.X shaders at loading times */
476         ret = 0;
477         LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
478         {
479             dirty_consts[lconst->idx] = 1; /* Dirtify so the non-immediate constant overwrites it next time */
480             ret = max(ret, lconst->idx + 1);
481             GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, lconst->idx, (GLfloat*)lconst->value));
482         }
483         checkGLcall("glProgramEnvParameter4fvARB()");
484         return ret; /* The loaded immediate constants need reloading for the next shader */
485     } else {
486         return 0; /* No constants are dirty now */
487     }
488 }
489 
490 /* Loads the texture dimensions for NP2 fixup into the currently set
491  * ARB_[vertex/fragment]_programs. */
492 static void shader_arb_load_np2fixup_constants(const struct arb_ps_np2fixup_info *fixup,
493         const struct wined3d_gl_info *gl_info, const struct wined3d_state *state)
494 {
495     GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS];
496     WORD active = fixup->super.active;
497     UINT i;
498 
499     if (!active)
500         return;
501 
502     for (i = 0; active; active >>= 1, ++i)
503     {
504         const struct wined3d_texture *tex = state->textures[i];
505         unsigned char idx = fixup->super.idx[i];
506         GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4];
507 
508         if (!(active & 1))
509             continue;
510 
511         if (!tex)
512         {
513             ERR("Nonexistent texture is flagged for NP2 texcoord fixup.\n");
514             continue;
515         }
516 
517         if (idx % 2)
518         {
519             tex_dim[2] = tex->pow2_matrix[0];
520             tex_dim[3] = tex->pow2_matrix[5];
521         }
522         else
523         {
524             tex_dim[0] = tex->pow2_matrix[0];
525             tex_dim[1] = tex->pow2_matrix[5];
526         }
527     }
528 
529     for (i = 0; i < fixup->super.num_consts; ++i)
530     {
531         GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
532                 fixup->offset + i, &np2fixup_constants[i * 4]));
533     }
534 }
535 
536 /* Context activation is done by the caller. */
537 static void shader_arb_ps_local_constants(const struct arb_ps_compiled_shader *gl_shader,
538         const struct wined3d_context *context, const struct wined3d_state *state, UINT rt_height)
539 {
540     const struct wined3d_gl_info *gl_info = context->gl_info;
541     unsigned char i;
542 
543     for(i = 0; i < gl_shader->numbumpenvmatconsts; i++)
544     {
545         int texunit = gl_shader->bumpenvmatconst[i].texunit;
546 
547         /* The state manager takes care that this function is always called if the bump env matrix changes */
548         const float *data = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_MAT00];
549         GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
550                 gl_shader->bumpenvmatconst[i].const_num, data));
551 
552         if (gl_shader->luminanceconst[i].const_num != WINED3D_CONST_NUM_UNUSED)
553         {
554             /* WINED3D_TSS_BUMPENVLSCALE and WINED3D_TSS_BUMPENVLOFFSET are next to each other.
555              * point gl to the scale, and load 4 floats. x = scale, y = offset, z and w are junk, we
556              * don't care about them. The pointers are valid for sure because the stateblock is bigger.
557              * (they're WINED3D_TSS_TEXTURETRANSFORMFLAGS and WINED3D_TSS_ADDRESSW, so most likely 0 or NaN
558             */
559             const float *scale = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_LSCALE];
560             GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
561                     gl_shader->luminanceconst[i].const_num, scale));
562         }
563     }
564     checkGLcall("Load bumpmap consts");
565 
566     if(gl_shader->ycorrection != WINED3D_CONST_NUM_UNUSED)
567     {
568         /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen).
569         * ycorrection.y: -1.0(onscreen), 1.0(offscreen)
570         * ycorrection.z: 1.0
571         * ycorrection.w: 0.0
572         */
573         float val[4];
574         val[0] = context->render_offscreen ? 0.0f : (float) rt_height;
575         val[1] = context->render_offscreen ? 1.0f : -1.0f;
576         val[2] = 1.0f;
577         val[3] = 0.0f;
578         GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->ycorrection, val));
579         checkGLcall("y correction loading");
580     }
581 
582     if (!gl_shader->num_int_consts) return;
583 
584     for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
585     {
586         if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED)
587         {
588             float val[4];
589             val[0] = (float)state->ps_consts_i[i].x;
590             val[1] = (float)state->ps_consts_i[i].y;
591             val[2] = (float)state->ps_consts_i[i].z;
592             val[3] = -1.0f;
593 
594             GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->int_consts[i], val));
595         }
596     }
597     checkGLcall("Load ps int consts");
598 }
599 
600 /* Context activation is done by the caller. */
601 static void shader_arb_vs_local_constants(const struct arb_vs_compiled_shader *gl_shader,
602         const struct wined3d_context *context, const struct wined3d_state *state)
603 {
604     const struct wined3d_gl_info *gl_info = context->gl_info;
605     float position_fixup[4];
606     unsigned char i;
607 
608     /* Upload the position fixup */
609     shader_get_position_fixup(context, state, position_fixup);
610     GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->pos_fixup, position_fixup));
611 
612     if (!gl_shader->num_int_consts) return;
613 
614     for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
615     {
616         if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED)
617         {
618             float val[4];
619             val[0] = (float)state->vs_consts_i[i].x;
620             val[1] = (float)state->vs_consts_i[i].y;
621             val[2] = (float)state->vs_consts_i[i].z;
622             val[3] = -1.0f;
623 
624             GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->int_consts[i], val));
625         }
626     }
627     checkGLcall("Load vs int consts");
628 }
629 
630 static void shader_arb_select(void *shader_priv, struct wined3d_context *context,
631         const struct wined3d_state *state);
632 
633 /**
634  * Loads the app-supplied constants into the currently set ARB_[vertex/fragment]_programs.
635  *
636  * We only support float constants in ARB at the moment, so don't
637  * worry about the Integers or Booleans
638  */
639 /* Context activation is done by the caller (state handler). */
640 static void shader_arb_load_constants_internal(struct shader_arb_priv *priv,
641         struct wined3d_context *context, const struct wined3d_state *state,
642         BOOL usePixelShader, BOOL useVertexShader, BOOL from_shader_select)
643 {
644     const struct wined3d_d3d_info *d3d_info = context->d3d_info;
645     const struct wined3d_gl_info *gl_info = context->gl_info;
646 
647     if (!from_shader_select)
648     {
649         const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX];
650         const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
651 
652         if (vshader
653                 && (vshader->reg_maps.boolean_constants
654                 || (!gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]
655                 && (vshader->reg_maps.integer_constants & ~vshader->reg_maps.local_int_consts))))
656         {
657             TRACE("bool/integer vertex shader constants potentially modified, forcing shader reselection.\n");
658             shader_arb_select(priv, context, state);
659         }
660         else if (pshader
661                 && (pshader->reg_maps.boolean_constants
662                 || (!gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]
663                 && (pshader->reg_maps.integer_constants & ~pshader->reg_maps.local_int_consts))))
664         {
665             TRACE("bool/integer pixel shader constants potentially modified, forcing shader reselection.\n");
666             shader_arb_select(priv, context, state);
667         }
668     }
669 
670     if (context != priv->last_context)
671     {
672         memset(priv->vshader_const_dirty, 1,
673                 sizeof(*priv->vshader_const_dirty) * d3d_info->limits.vs_uniform_count);
674         priv->highest_dirty_vs_const = d3d_info->limits.vs_uniform_count;
675 
676         memset(priv->pshader_const_dirty, 1,
677                 sizeof(*priv->pshader_const_dirty) * d3d_info->limits.ps_uniform_count);
678         priv->highest_dirty_ps_const = d3d_info->limits.ps_uniform_count;
679 
680         priv->last_context = context;
681     }
682 
683     if (useVertexShader)
684     {
685         const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX];
686         const struct arb_vs_compiled_shader *gl_shader = priv->compiled_vprog;
687 
688         /* Load DirectX 9 float constants for vertex shader */
689         priv->highest_dirty_vs_const = shader_arb_load_constants_f(vshader, gl_info, GL_VERTEX_PROGRAM_ARB,
690                 priv->highest_dirty_vs_const, state->vs_consts_f, priv->vshader_const_dirty);
691         shader_arb_vs_local_constants(gl_shader, context, state);
692     }
693 
694     if (usePixelShader)
695     {
696         const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
697         const struct arb_ps_compiled_shader *gl_shader = priv->compiled_fprog;
698         UINT rt_height = state->fb->render_targets[0]->height;
699 
700         /* Load DirectX 9 float constants for pixel shader */
701         priv->highest_dirty_ps_const = shader_arb_load_constants_f(pshader, gl_info, GL_FRAGMENT_PROGRAM_ARB,
702                 priv->highest_dirty_ps_const, state->ps_consts_f, priv->pshader_const_dirty);
703         shader_arb_ps_local_constants(gl_shader, context, state, rt_height);
704 
705         if (context->constant_update_mask & WINED3D_SHADER_CONST_PS_NP2_FIXUP)
706             shader_arb_load_np2fixup_constants(&gl_shader->np2fixup_info, gl_info, state);
707     }
708 }
709 
710 static void shader_arb_load_constants(void *shader_priv, struct wined3d_context *context,
711         const struct wined3d_state *state)
712 {
713     BOOL vs = use_vs(state);
714     BOOL ps = use_ps(state);
715 
716     shader_arb_load_constants_internal(shader_priv, context, state, ps, vs, FALSE);
717 }
718 
719 static void shader_arb_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count)
720 {
721     struct wined3d_context *context = context_get_current();
722     struct shader_arb_priv *priv = device->shader_priv;
723 
724     /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
725      * context. On a context switch the old context will be fully dirtified */
726     if (!context || context->device != device)
727         return;
728 
729     memset(priv->vshader_const_dirty + start, 1, sizeof(*priv->vshader_const_dirty) * count);
730     priv->highest_dirty_vs_const = max(priv->highest_dirty_vs_const, start + count);
731 }
732 
733 static void shader_arb_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count)
734 {
735     struct wined3d_context *context = context_get_current();
736     struct shader_arb_priv *priv = device->shader_priv;
737 
738     /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
739      * context. On a context switch the old context will be fully dirtified */
740     if (!context || context->device != device)
741         return;
742 
743     memset(priv->pshader_const_dirty + start, 1, sizeof(*priv->pshader_const_dirty) * count);
744     priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, start + count);
745 }
746 
747 static void shader_arb_append_imm_vec4(struct wined3d_string_buffer *buffer, const float *values)
748 {
749     char str[4][17];
750 
751     wined3d_ftoa(values[0], str[0]);
752     wined3d_ftoa(values[1], str[1]);
753     wined3d_ftoa(values[2], str[2]);
754     wined3d_ftoa(values[3], str[3]);
755     shader_addline(buffer, "{%s, %s, %s, %s}", str[0], str[1], str[2], str[3]);
756 }
757 
758 /* Generate the variable & register declarations for the ARB_vertex_program output target */
759 static void shader_generate_arb_declarations(const struct wined3d_shader *shader,
760         const struct wined3d_shader_reg_maps *reg_maps, struct wined3d_string_buffer *buffer,
761         const struct wined3d_gl_info *gl_info, DWORD *num_clipplanes,
762         const struct shader_arb_ctx_priv *ctx)
763 {
764     DWORD i;
765     char pshader = shader_is_pshader_version(reg_maps->shader_version.type);
766     const struct wined3d_shader_lconst *lconst;
767     unsigned max_constantsF;
768     DWORD map;
769 
770     /* In pixel shaders, all private constants are program local, we don't need anything
771      * from program.env. Thus we can advertise the full set of constants in pixel shaders.
772      * If we need a private constant the GL implementation will squeeze it in somewhere
773      *
774      * With vertex shaders we need the posFixup and on some GL implementations 4 helper
775      * immediate values. The posFixup is loaded using program.env for now, so always
776      * subtract one from the number of constants. If the shader uses indirect addressing,
777      * account for the helper const too because we have to declare all available d3d constants
778      * and don't know which are actually used.
779      */
780     if (pshader)
781     {
782         max_constantsF = gl_info->limits.arb_ps_native_constants;
783         /* 24 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. */
784         if (max_constantsF < 24)
785             max_constantsF = gl_info->limits.arb_ps_float_constants;
786     }
787     else
788     {
789         const struct arb_vshader_private *shader_data = shader->backend_data;
790         max_constantsF = gl_info->limits.arb_vs_native_constants;
791         /* 96 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value.
792          * Also prevents max_constantsF from becoming less than 0 and
793          * wrapping . */
794         if (max_constantsF < 96)
795             max_constantsF = gl_info->limits.arb_vs_float_constants;
796 
797         if (reg_maps->usesrelconstF)
798         {
799             DWORD highest_constf = 0, clip_limit;
800 
801             max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info);
802             max_constantsF -= wined3d_popcount(reg_maps->integer_constants);
803             max_constantsF -= gl_info->reserved_arb_constants;
804 
805             for (i = 0; i < shader->limits->constant_float; ++i)
806             {
807                 DWORD idx = i >> 5;
808                 DWORD shift = i & 0x1f;
809                 if (reg_maps->constf[idx] & (1u << shift))
810                     highest_constf = i;
811             }
812 
813             if(use_nv_clip(gl_info) && ctx->target_version >= NV2)
814             {
815                 if(ctx->cur_vs_args->super.clip_enabled)
816                     clip_limit = gl_info->limits.user_clip_distances;
817                 else
818                     clip_limit = 0;
819             }
820             else
821             {
822                 unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask;
823                 clip_limit = min(wined3d_popcount(mask), 4);
824             }
825             *num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1);
826             max_constantsF -= *num_clipplanes;
827             if(*num_clipplanes < clip_limit)
828             {
829                 WARN("Only %u clip planes out of %u enabled.\n", *num_clipplanes,
830                         gl_info->limits.user_clip_distances);
831             }
832         }
833         else
834         {
835             if (ctx->target_version >= NV2)
836                 *num_clipplanes = gl_info->limits.user_clip_distances;
837             else
838                 *num_clipplanes = min(gl_info->limits.user_clip_distances, 4);
839         }
840     }
841 
842     for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
843     {
844         if (map & 1) shader_addline(buffer, "TEMP R%u;\n", i);
845     }
846 
847     for (i = 0, map = reg_maps->address; map; map >>= 1, ++i)
848     {
849         if (map & 1) shader_addline(buffer, "ADDRESS A%u;\n", i);
850     }
851 
852     if (pshader && reg_maps->shader_version.major == 1 && reg_maps->shader_version.minor <= 3)
853     {
854         for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i)
855         {
856             if (map & 1) shader_addline(buffer, "TEMP T%u;\n", i);
857         }
858     }
859 
860     if (!shader->load_local_constsF)
861     {
862         LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
863         {
864             const float *value;
865             value = (const float *)lconst->value;
866             shader_addline(buffer, "PARAM C%u = ", lconst->idx);
867             shader_arb_append_imm_vec4(buffer, value);
868             shader_addline(buffer, ";\n");
869         }
870     }
871 
872     /* After subtracting privately used constants from the hardware limit(they are loaded as
873      * local constants), make sure the shader doesn't violate the env constant limit
874      */
875     if (pshader)
876     {
877         max_constantsF = min(max_constantsF, gl_info->limits.arb_ps_float_constants);
878     }
879     else
880     {
881         max_constantsF = min(max_constantsF, gl_info->limits.arb_vs_float_constants);
882     }
883 
884     /* Avoid declaring more constants than needed */
885     max_constantsF = min(max_constantsF, shader->limits->constant_float);
886 
887     /* we use the array-based constants array if the local constants are marked for loading,
888      * because then we use indirect addressing, or when the local constant list is empty,
889      * because then we don't know if we're using indirect addressing or not. If we're hardcoding
890      * local constants do not declare the loaded constants as an array because ARB compilers usually
891      * do not optimize unused constants away
892      */
893     if (reg_maps->usesrelconstF)
894     {
895         /* Need to PARAM the environment parameters (constants) so we can use relative addressing */
896         shader_addline(buffer, "PARAM C[%d] = { program.env[0..%d] };\n",
897                     max_constantsF, max_constantsF - 1);
898     }
899     else
900     {
901         for (i = 0; i < max_constantsF; ++i)
902         {
903             if (!shader_constant_is_local(shader, i) && wined3d_extract_bits(reg_maps->constf, i, 1))
904             {
905                 shader_addline(buffer, "PARAM C%d = program.env[%d];\n",i, i);
906             }
907         }
908     }
909 }
910 
911 static const char * const shift_tab[] = {
912     "dummy",     /*  0 (none) */
913     "coefmul.x", /*  1 (x2)   */
914     "coefmul.y", /*  2 (x4)   */
915     "coefmul.z", /*  3 (x8)   */
916     "coefmul.w", /*  4 (x16)  */
917     "dummy",     /*  5 (x32)  */
918     "dummy",     /*  6 (x64)  */
919     "dummy",     /*  7 (x128) */
920     "dummy",     /*  8 (d256) */
921     "dummy",     /*  9 (d128) */
922     "dummy",     /* 10 (d64)  */
923     "dummy",     /* 11 (d32)  */
924     "coefdiv.w", /* 12 (d16)  */
925     "coefdiv.z", /* 13 (d8)   */
926     "coefdiv.y", /* 14 (d4)   */
927     "coefdiv.x"  /* 15 (d2)   */
928 };
929 
930 static void shader_arb_get_write_mask(const struct wined3d_shader_instruction *ins,
931         const struct wined3d_shader_dst_param *dst, char *write_mask)
932 {
933     char *ptr = write_mask;
934 
935     if (dst->write_mask != WINED3DSP_WRITEMASK_ALL)
936     {
937         *ptr++ = '.';
938         if (dst->write_mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x';
939         if (dst->write_mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y';
940         if (dst->write_mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z';
941         if (dst->write_mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w';
942     }
943 
944     *ptr = '\0';
945 }
946 
947 static void shader_arb_get_swizzle(const struct wined3d_shader_src_param *param, BOOL fixup, char *swizzle_str)
948 {
949     /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
950      * but addressed as "rgba". To fix this we need to swap the register's x
951      * and z components. */
952     const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
953     char *ptr = swizzle_str;
954 
955     /* swizzle bits fields: wwzzyyxx */
956     DWORD swizzle = param->swizzle;
957     DWORD swizzle_x = swizzle & 0x03;
958     DWORD swizzle_y = (swizzle >> 2) & 0x03;
959     DWORD swizzle_z = (swizzle >> 4) & 0x03;
960     DWORD swizzle_w = (swizzle >> 6) & 0x03;
961 
962     /* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to
963      * generate a swizzle string. Unless we need to our own swizzling. */
964     if (swizzle != WINED3DSP_NOSWIZZLE || fixup)
965     {
966         *ptr++ = '.';
967         if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) {
968             *ptr++ = swizzle_chars[swizzle_x];
969         } else {
970             *ptr++ = swizzle_chars[swizzle_x];
971             *ptr++ = swizzle_chars[swizzle_y];
972             *ptr++ = swizzle_chars[swizzle_z];
973             *ptr++ = swizzle_chars[swizzle_w];
974         }
975     }
976 
977     *ptr = '\0';
978 }
979 
980 static void shader_arb_request_a0(const struct wined3d_shader_instruction *ins, const char *src)
981 {
982     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
983     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
984 
985     if (!strcmp(priv->addr_reg, src)) return;
986 
987     strcpy(priv->addr_reg, src);
988     shader_addline(buffer, "ARL A0.x, %s;\n", src);
989 }
990 
991 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins,
992         const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr);
993 
994 static void shader_arb_get_register_name(const struct wined3d_shader_instruction *ins,
995         const struct wined3d_shader_register *reg, char *register_name, BOOL *is_color)
996 {
997     /* oPos, oFog and oPts in D3D */
998     static const char * const rastout_reg_names[] = {"TMP_OUT", "TMP_FOGCOORD", "result.pointsize"};
999     const struct wined3d_shader *shader = ins->ctx->shader;
1000     const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
1001     BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type);
1002     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1003 
1004     *is_color = FALSE;
1005 
1006     switch (reg->type)
1007     {
1008         case WINED3DSPR_TEMP:
1009             sprintf(register_name, "R%u", reg->idx[0].offset);
1010             break;
1011 
1012         case WINED3DSPR_INPUT:
1013             if (pshader)
1014             {
1015                 if (reg_maps->shader_version.major < 3)
1016                 {
1017                     if (!reg->idx[0].offset)
1018                         strcpy(register_name, "fragment.color.primary");
1019                     else
1020                         strcpy(register_name, "fragment.color.secondary");
1021                 }
1022                 else
1023                 {
1024                     if (reg->idx[0].rel_addr)
1025                     {
1026                         char rel_reg[50];
1027                         shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg);
1028 
1029                         if (!strcmp(rel_reg, "**aL_emul**"))
1030                         {
1031                             DWORD idx = ctx->aL + reg->idx[0].offset;
1032                             if(idx < MAX_REG_INPUT)
1033                             {
1034                                 strcpy(register_name, ctx->ps_input[idx]);
1035                             }
1036                             else
1037                             {
1038                                 ERR("Pixel shader input register out of bounds: %u\n", idx);
1039                                 sprintf(register_name, "out_of_bounds_%u", idx);
1040                             }
1041                         }
1042                         else if (reg_maps->input_registers & 0x0300)
1043                         {
1044                             /* There are two ways basically:
1045                              *
1046                              * 1) Use the unrolling code that is used for loop emulation and unroll the loop.
1047                              *    That means trouble if the loop also contains a breakc or if the control values
1048                              *    aren't local constants.
1049                              * 2) Generate an if block that checks if aL.y < 8, == 8 or == 9 and selects the
1050                              *    source dynamically. The trouble is that we cannot simply read aL.y because it
1051                              *    is an ADDRESS register. We could however push it, load .zw with a value and use
1052                              *    ADAC to load the condition code register and pop it again afterwards
1053                              */
1054                             FIXME("Relative input register addressing with more than 8 registers\n");
1055 
1056                             /* This is better than nothing for now */
1057                             sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset);
1058                         }
1059                         else if(ctx->cur_ps_args->super.vp_mode != vertexshader)
1060                         {
1061                             /* This is problematic because we'd have to consult the ctx->ps_input strings
1062                              * for where to find the varying. Some may be "0.0", others can be texcoords or
1063                              * colors. This needs either a pipeline replacement to make the vertex shader feed
1064                              * proper varyings, or loop unrolling
1065                              *
1066                              * For now use the texcoords and hope for the best
1067                              */
1068                             FIXME("Non-vertex shader varying input with indirect addressing\n");
1069                             sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset);
1070                         }
1071                         else
1072                         {
1073                             /* D3D supports indirect addressing only with aL in loop registers. The loop instruction
1074                              * pulls GL_NV_fragment_program2 in
1075                              */
1076                             sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset);
1077                         }
1078                     }
1079                     else
1080                     {
1081                         if (reg->idx[0].offset < MAX_REG_INPUT)
1082                         {
1083                             strcpy(register_name, ctx->ps_input[reg->idx[0].offset]);
1084                         }
1085                         else
1086                         {
1087                             ERR("Pixel shader input register out of bounds: %u\n", reg->idx[0].offset);
1088                             sprintf(register_name, "out_of_bounds_%u", reg->idx[0].offset);
1089                         }
1090                     }
1091                 }
1092             }
1093             else
1094             {
1095                 if (ctx->cur_vs_args->super.swizzle_map & (1u << reg->idx[0].offset))
1096                     *is_color = TRUE;
1097                 sprintf(register_name, "vertex.attrib[%u]", reg->idx[0].offset);
1098             }
1099             break;
1100 
1101         case WINED3DSPR_CONST:
1102             if (!pshader && reg->idx[0].rel_addr)
1103             {
1104                 const struct arb_vshader_private *shader_data = shader->backend_data;
1105                 UINT rel_offset = ctx->target_version == ARB ? shader_data->rel_offset : 0;
1106                 BOOL aL = FALSE;
1107                 char rel_reg[50];
1108                 if (reg_maps->shader_version.major < 2)
1109                 {
1110                     sprintf(rel_reg, "A0.x");
1111                 }
1112                 else
1113                 {
1114                     shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg);
1115                     if (ctx->target_version == ARB)
1116                     {
1117                         if (!strcmp(rel_reg, "**aL_emul**"))
1118                         {
1119                             aL = TRUE;
1120                         } else {
1121                             shader_arb_request_a0(ins, rel_reg);
1122                             sprintf(rel_reg, "A0.x");
1123                         }
1124                     }
1125                 }
1126                 if (aL)
1127                     sprintf(register_name, "C[%u]", ctx->aL + reg->idx[0].offset);
1128                 else if (reg->idx[0].offset >= rel_offset)
1129                     sprintf(register_name, "C[%s + %u]", rel_reg, reg->idx[0].offset - rel_offset);
1130                 else
1131                     sprintf(register_name, "C[%s - %u]", rel_reg, rel_offset - reg->idx[0].offset);
1132             }
1133             else
1134             {
1135                 if (reg_maps->usesrelconstF)
1136                     sprintf(register_name, "C[%u]", reg->idx[0].offset);
1137                 else
1138                     sprintf(register_name, "C%u", reg->idx[0].offset);
1139             }
1140             break;
1141 
1142         case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
1143             if (pshader)
1144             {
1145                 if (reg_maps->shader_version.major == 1
1146                         && reg_maps->shader_version.minor <= 3)
1147                     /* In ps <= 1.3, Tx is a temporary register as destination
1148                      * to all instructions, and as source to most instructions.
1149                      * For some instructions it is the texcoord input. Those
1150                      * instructions know about the special use. */
1151                     sprintf(register_name, "T%u", reg->idx[0].offset);
1152                 else
1153                     /* In ps 1.4 and 2.x Tx is always a (read-only) varying. */
1154                     sprintf(register_name, "fragment.texcoord[%u]", reg->idx[0].offset);
1155             }
1156             else
1157             {
1158                 if (reg_maps->shader_version.major == 1 || ctx->target_version >= NV2)
1159                     sprintf(register_name, "A%u", reg->idx[0].offset);
1160                 else
1161                     sprintf(register_name, "A%u_SHADOW", reg->idx[0].offset);
1162             }
1163             break;
1164 
1165         case WINED3DSPR_COLOROUT:
1166             if (ctx->ps_post_process && !reg->idx[0].offset)
1167             {
1168                 strcpy(register_name, "TMP_COLOR");
1169             }
1170             else
1171             {
1172                 if (ctx->cur_ps_args->super.srgb_correction)
1173                     FIXME("sRGB correction on higher render targets.\n");
1174                 if (reg_maps->rt_mask > 1)
1175                     sprintf(register_name, "result.color[%u]", reg->idx[0].offset);
1176                 else
1177                     strcpy(register_name, "result.color");
1178             }
1179             break;
1180 
1181         case WINED3DSPR_RASTOUT:
1182             if (reg->idx[0].offset == 1)
1183                 sprintf(register_name, "%s", ctx->fog_output);
1184             else
1185                 sprintf(register_name, "%s", rastout_reg_names[reg->idx[0].offset]);
1186             break;
1187 
1188         case WINED3DSPR_DEPTHOUT:
1189             strcpy(register_name, "result.depth");
1190             break;
1191 
1192         case WINED3DSPR_ATTROUT:
1193         /* case WINED3DSPR_OUTPUT: */
1194             if (pshader)
1195                 sprintf(register_name, "oD[%u]", reg->idx[0].offset);
1196             else
1197                 strcpy(register_name, ctx->color_output[reg->idx[0].offset]);
1198             break;
1199 
1200         case WINED3DSPR_TEXCRDOUT:
1201             if (pshader)
1202                 sprintf(register_name, "oT[%u]", reg->idx[0].offset);
1203             else if (reg_maps->shader_version.major < 3)
1204                 strcpy(register_name, ctx->texcrd_output[reg->idx[0].offset]);
1205             else
1206                 strcpy(register_name, ctx->vs_output[reg->idx[0].offset]);
1207             break;
1208 
1209         case WINED3DSPR_LOOP:
1210             if(ctx->target_version >= NV2)
1211             {
1212                 /* Pshader has an implicitly declared loop index counter A0.x that cannot be renamed */
1213                 if(pshader) sprintf(register_name, "A0.x");
1214                 else sprintf(register_name, "aL.y");
1215             }
1216             else
1217             {
1218                 /* Unfortunately this code cannot return the value of ctx->aL here. An immediate value
1219                  * would be valid, but if aL is used for indexing(its only use), there's likely an offset,
1220                  * thus the result would be something like C[15 + 30], which is not valid in the ARB program
1221                  * grammar. So return a marker for the emulated aL and intercept it in constant and varying
1222                  * indexing
1223                  */
1224                 sprintf(register_name, "**aL_emul**");
1225             }
1226 
1227             break;
1228 
1229         case WINED3DSPR_CONSTINT:
1230             sprintf(register_name, "I%u", reg->idx[0].offset);
1231             break;
1232 
1233         case WINED3DSPR_MISCTYPE:
1234             if (!reg->idx[0].offset)
1235                 sprintf(register_name, "vpos");
1236             else if (reg->idx[0].offset == 1)
1237                 sprintf(register_name, "fragment.facing.x");
1238             else
1239                 FIXME("Unknown MISCTYPE register index %u.\n", reg->idx[0].offset);
1240             break;
1241 
1242         default:
1243             FIXME("Unhandled register type %#x[%u].\n", reg->type, reg->idx[0].offset);
1244             sprintf(register_name, "unrecognized_register[%u]", reg->idx[0].offset);
1245             break;
1246     }
1247 }
1248 
1249 static void shader_arb_get_dst_param(const struct wined3d_shader_instruction *ins,
1250         const struct wined3d_shader_dst_param *wined3d_dst, char *str)
1251 {
1252     char register_name[255];
1253     char write_mask[6];
1254     BOOL is_color;
1255 
1256     shader_arb_get_register_name(ins, &wined3d_dst->reg, register_name, &is_color);
1257     strcpy(str, register_name);
1258 
1259     shader_arb_get_write_mask(ins, wined3d_dst, write_mask);
1260     strcat(str, write_mask);
1261 }
1262 
1263 static const char *shader_arb_get_fixup_swizzle(enum fixup_channel_source channel_source)
1264 {
1265     switch(channel_source)
1266     {
1267         case CHANNEL_SOURCE_ZERO: return "0";
1268         case CHANNEL_SOURCE_ONE: return "1";
1269         case CHANNEL_SOURCE_X: return "x";
1270         case CHANNEL_SOURCE_Y: return "y";
1271         case CHANNEL_SOURCE_Z: return "z";
1272         case CHANNEL_SOURCE_W: return "w";
1273         default:
1274             FIXME("Unhandled channel source %#x\n", channel_source);
1275             return "undefined";
1276     }
1277 }
1278 
1279 struct color_fixup_masks
1280 {
1281     DWORD source;
1282     DWORD sign;
1283 };
1284 
1285 static struct color_fixup_masks calc_color_correction(struct color_fixup_desc fixup, DWORD dst_mask)
1286 {
1287     struct color_fixup_masks masks = {0, 0};
1288 
1289     if (is_complex_fixup(fixup))
1290     {
1291         enum complex_fixup complex_fixup = get_complex_fixup(fixup);
1292         FIXME("Complex fixup (%#x) not supported\n", complex_fixup);
1293         return masks;
1294     }
1295 
1296     if (fixup.x_source != CHANNEL_SOURCE_X)
1297         masks.source |= WINED3DSP_WRITEMASK_0;
1298     if (fixup.y_source != CHANNEL_SOURCE_Y)
1299         masks.source |= WINED3DSP_WRITEMASK_1;
1300     if (fixup.z_source != CHANNEL_SOURCE_Z)
1301         masks.source |= WINED3DSP_WRITEMASK_2;
1302     if (fixup.w_source != CHANNEL_SOURCE_W)
1303         masks.source |= WINED3DSP_WRITEMASK_3;
1304     masks.source &= dst_mask;
1305 
1306     if (fixup.x_sign_fixup)
1307         masks.sign |= WINED3DSP_WRITEMASK_0;
1308     if (fixup.y_sign_fixup)
1309         masks.sign |= WINED3DSP_WRITEMASK_1;
1310     if (fixup.z_sign_fixup)
1311         masks.sign |= WINED3DSP_WRITEMASK_2;
1312     if (fixup.w_sign_fixup)
1313         masks.sign |= WINED3DSP_WRITEMASK_3;
1314     masks.sign &= dst_mask;
1315 
1316     return masks;
1317 }
1318 
1319 static void gen_color_correction(struct wined3d_string_buffer *buffer, const char *dst,
1320         const char *src, const char *one, const char *two,
1321         struct color_fixup_desc fixup, struct color_fixup_masks masks)
1322 {
1323     const char *sign_fixup_src = dst;
1324 
1325     if (masks.source)
1326     {
1327         if (masks.sign)
1328             sign_fixup_src = "TA";
1329 
1330         shader_addline(buffer, "SWZ %s, %s, %s, %s, %s, %s;\n", sign_fixup_src, src,
1331                 shader_arb_get_fixup_swizzle(fixup.x_source), shader_arb_get_fixup_swizzle(fixup.y_source),
1332                 shader_arb_get_fixup_swizzle(fixup.z_source), shader_arb_get_fixup_swizzle(fixup.w_source));
1333     }
1334     else if (masks.sign)
1335     {
1336         sign_fixup_src = src;
1337     }
1338 
1339     if (masks.sign)
1340     {
1341         char reg_mask[6];
1342         char *ptr = reg_mask;
1343 
1344         if (masks.sign != WINED3DSP_WRITEMASK_ALL)
1345         {
1346             *ptr++ = '.';
1347             if (masks.sign & WINED3DSP_WRITEMASK_0)
1348                 *ptr++ = 'x';
1349             if (masks.sign & WINED3DSP_WRITEMASK_1)
1350                 *ptr++ = 'y';
1351             if (masks.sign & WINED3DSP_WRITEMASK_2)
1352                 *ptr++ = 'z';
1353             if (masks.sign & WINED3DSP_WRITEMASK_3)
1354                 *ptr++ = 'w';
1355         }
1356         *ptr = '\0';
1357 
1358         shader_addline(buffer, "MAD %s%s, %s, %s, -%s;\n", dst, reg_mask, sign_fixup_src, two, one);
1359     }
1360 }
1361 
1362 static const char *shader_arb_get_modifier(const struct wined3d_shader_instruction *ins)
1363 {
1364     DWORD mod;
1365     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
1366     if (!ins->dst_count) return "";
1367 
1368     mod = ins->dst[0].modifiers;
1369 
1370     /* Silently ignore PARTIALPRECISION if it's not supported */
1371     if(priv->target_version == ARB) mod &= ~WINED3DSPDM_PARTIALPRECISION;
1372 
1373     if(mod & WINED3DSPDM_MSAMPCENTROID)
1374     {
1375         FIXME("Unhandled modifier WINED3DSPDM_MSAMPCENTROID\n");
1376         mod &= ~WINED3DSPDM_MSAMPCENTROID;
1377     }
1378 
1379     switch(mod)
1380     {
1381         case WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION:
1382             return "H_SAT";
1383 
1384         case WINED3DSPDM_SATURATE:
1385             return "_SAT";
1386 
1387         case WINED3DSPDM_PARTIALPRECISION:
1388             return "H";
1389 
1390         case 0:
1391             return "";
1392 
1393         default:
1394             FIXME("Unknown modifiers 0x%08x\n", mod);
1395             return "";
1396     }
1397 }
1398 
1399 #define TEX_PROJ        0x1
1400 #define TEX_BIAS        0x2
1401 #define TEX_LOD         0x4
1402 #define TEX_DERIV       0x10
1403 
1404 static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD sampler_idx,
1405         const char *dst_str, const char *coord_reg, WORD flags, const char *dsx, const char *dsy)
1406 {
1407     enum wined3d_shader_resource_type resource_type = ins->ctx->reg_maps->resource_info[sampler_idx].type;
1408     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1409     const char *tex_type;
1410     BOOL np2_fixup = FALSE;
1411     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
1412     const char *mod;
1413     BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type);
1414     const struct wined3d_shader *shader;
1415     const struct wined3d_device *device;
1416     const struct wined3d_gl_info *gl_info;
1417     const char *tex_dst = dst_str;
1418     struct color_fixup_masks masks;
1419 
1420     /* D3D vertex shader sampler IDs are vertex samplers(0-3), not global d3d samplers */
1421     if(!pshader) sampler_idx += MAX_FRAGMENT_SAMPLERS;
1422 
1423     switch (resource_type)
1424     {
1425         case WINED3D_SHADER_RESOURCE_TEXTURE_1D:
1426             tex_type = "1D";
1427             break;
1428 
1429         case WINED3D_SHADER_RESOURCE_TEXTURE_2D:
1430             shader = ins->ctx->shader;
1431             device = shader->device;
1432             gl_info = &device->adapter->gl_info;
1433 
1434             if (pshader && priv->cur_ps_args->super.np2_fixup & (1u << sampler_idx)
1435                     && gl_info->supported[ARB_TEXTURE_RECTANGLE])
1436                 tex_type = "RECT";
1437             else
1438                 tex_type = "2D";
1439             if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
1440             {
1441                 if (priv->cur_np2fixup_info->super.active & (1u << sampler_idx))
1442                 {
1443                     if (flags) FIXME("Only ordinary sampling from NP2 textures is supported.\n");
1444                     else np2_fixup = TRUE;
1445                 }
1446             }
1447             break;
1448 
1449         case WINED3D_SHADER_RESOURCE_TEXTURE_3D:
1450             tex_type = "3D";
1451             break;
1452 
1453         case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE:
1454             tex_type = "CUBE";
1455             break;
1456 
1457         default:
1458             ERR("Unexpected resource type %#x.\n", resource_type);
1459             tex_type = "";
1460     }
1461 
1462     /* TEX, TXL, TXD and TXP do not support the "H" modifier,
1463      * so don't use shader_arb_get_modifier
1464      */
1465     if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) mod = "_SAT";
1466     else mod = "";
1467 
1468     /* Fragment samplers always have indentity mapping */
1469     if(sampler_idx >= MAX_FRAGMENT_SAMPLERS)
1470     {
1471         sampler_idx = priv->cur_vs_args->vertex.samplers[sampler_idx - MAX_FRAGMENT_SAMPLERS];
1472     }
1473 
1474     if (pshader)
1475     {
1476         masks = calc_color_correction(priv->cur_ps_args->super.color_fixup[sampler_idx],
1477                 ins->dst[0].write_mask);
1478 
1479         if (masks.source || masks.sign)
1480             tex_dst = "TA";
1481     }
1482 
1483     if (flags & TEX_DERIV)
1484     {
1485         if(flags & TEX_PROJ) FIXME("Projected texture sampling with custom derivatives\n");
1486         if(flags & TEX_BIAS) FIXME("Biased texture sampling with custom derivatives\n");
1487         shader_addline(buffer, "TXD%s %s, %s, %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg,
1488                        dsx, dsy, sampler_idx, tex_type);
1489     }
1490     else if(flags & TEX_LOD)
1491     {
1492         if(flags & TEX_PROJ) FIXME("Projected texture sampling with explicit lod\n");
1493         if(flags & TEX_BIAS) FIXME("Biased texture sampling with explicit lod\n");
1494         shader_addline(buffer, "TXL%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg,
1495                        sampler_idx, tex_type);
1496     }
1497     else if (flags & TEX_BIAS)
1498     {
1499         /* Shouldn't be possible, but let's check for it */
1500         if(flags & TEX_PROJ) FIXME("Biased and Projected texture sampling\n");
1501         /* TXB takes the 4th component of the source vector automatically, as d3d. Nothing more to do */
1502         shader_addline(buffer, "TXB%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type);
1503     }
1504     else if (flags & TEX_PROJ)
1505     {
1506         shader_addline(buffer, "TXP%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type);
1507     }
1508     else
1509     {
1510         if (np2_fixup)
1511         {
1512             const unsigned char idx = priv->cur_np2fixup_info->super.idx[sampler_idx];
1513             shader_addline(buffer, "MUL TA, np2fixup[%u].%s, %s;\n", idx >> 1,
1514                            (idx % 2) ? "zwxy" : "xyzw", coord_reg);
1515 
1516             shader_addline(buffer, "TEX%s %s, TA, texture[%u], %s;\n", mod, tex_dst, sampler_idx, tex_type);
1517         }
1518         else
1519             shader_addline(buffer, "TEX%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type);
1520     }
1521 
1522     if (pshader)
1523     {
1524         gen_color_correction(buffer, dst_str, tex_dst,
1525                 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_ONE),
1526                 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_TWO),
1527                 priv->cur_ps_args->super.color_fixup[sampler_idx], masks);
1528     }
1529 }
1530 
1531 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins,
1532         const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr)
1533 {
1534     /* Generate a line that does the input modifier computation and return the input register to use */
1535     BOOL is_color = FALSE, insert_line;
1536     char regstr[256];
1537     char swzstr[20];
1538     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1539     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1540     const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
1541     const char *two = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_TWO);
1542 
1543     /* Assume a new line will be added */
1544     insert_line = TRUE;
1545 
1546     /* Get register name */
1547     shader_arb_get_register_name(ins, &src->reg, regstr, &is_color);
1548     shader_arb_get_swizzle(src, is_color, swzstr);
1549 
1550     switch (src->modifiers)
1551     {
1552     case WINED3DSPSM_NONE:
1553         sprintf(outregstr, "%s%s", regstr, swzstr);
1554         insert_line = FALSE;
1555         break;
1556     case WINED3DSPSM_NEG:
1557         sprintf(outregstr, "-%s%s", regstr, swzstr);
1558         insert_line = FALSE;
1559         break;
1560     case WINED3DSPSM_BIAS:
1561         shader_addline(buffer, "ADD T%c, %s, -coefdiv.x;\n", 'A' + tmpreg, regstr);
1562         break;
1563     case WINED3DSPSM_BIASNEG:
1564         shader_addline(buffer, "ADD T%c, -%s, coefdiv.x;\n", 'A' + tmpreg, regstr);
1565         break;
1566     case WINED3DSPSM_SIGN:
1567         shader_addline(buffer, "MAD T%c, %s, %s, -%s;\n", 'A' + tmpreg, regstr, two, one);
1568         break;
1569     case WINED3DSPSM_SIGNNEG:
1570         shader_addline(buffer, "MAD T%c, %s, -%s, %s;\n", 'A' + tmpreg, regstr, two, one);
1571         break;
1572     case WINED3DSPSM_COMP:
1573         shader_addline(buffer, "SUB T%c, %s, %s;\n", 'A' + tmpreg, one, regstr);
1574         break;
1575     case WINED3DSPSM_X2:
1576         shader_addline(buffer, "ADD T%c, %s, %s;\n", 'A' + tmpreg, regstr, regstr);
1577         break;
1578     case WINED3DSPSM_X2NEG:
1579         shader_addline(buffer, "ADD T%c, -%s, -%s;\n", 'A' + tmpreg, regstr, regstr);
1580         break;
1581     case WINED3DSPSM_DZ:
1582         shader_addline(buffer, "RCP T%c, %s.z;\n", 'A' + tmpreg, regstr);
1583         shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg);
1584         break;
1585     case WINED3DSPSM_DW:
1586         shader_addline(buffer, "RCP T%c, %s.w;\n", 'A' + tmpreg, regstr);
1587         shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg);
1588         break;
1589     case WINED3DSPSM_ABS:
1590         if(ctx->target_version >= NV2) {
1591             sprintf(outregstr, "|%s%s|", regstr, swzstr);
1592             insert_line = FALSE;
1593         } else {
1594             shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr);
1595         }
1596         break;
1597     case WINED3DSPSM_ABSNEG:
1598         if(ctx->target_version >= NV2) {
1599             sprintf(outregstr, "-|%s%s|", regstr, swzstr);
1600         } else {
1601             shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr);
1602             sprintf(outregstr, "-T%c%s", 'A' + tmpreg, swzstr);
1603         }
1604         insert_line = FALSE;
1605         break;
1606     default:
1607         sprintf(outregstr, "%s%s", regstr, swzstr);
1608         insert_line = FALSE;
1609     }
1610 
1611     /* Return modified or original register, with swizzle */
1612     if (insert_line)
1613         sprintf(outregstr, "T%c%s", 'A' + tmpreg, swzstr);
1614 }
1615 
1616 static void pshader_hw_bem(const struct wined3d_shader_instruction *ins)
1617 {
1618     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1619     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1620     DWORD sampler_code = dst->reg.idx[0].offset;
1621     char dst_name[50];
1622     char src_name[2][50];
1623 
1624     shader_arb_get_dst_param(ins, dst, dst_name);
1625 
1626     /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
1627      *
1628      * Keep in mind that src_name[1] can be "TB" and src_name[0] can be "TA" because modifiers like _x2 are valid
1629      * with bem. So delay loading the first parameter until after the perturbation calculation which needs two
1630      * temps is done.
1631      */
1632     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1633     shader_addline(buffer, "SWZ TA, bumpenvmat%d, x, z, 0, 0;\n", sampler_code);
1634     shader_addline(buffer, "DP3 TC.r, TA, %s;\n", src_name[1]);
1635     shader_addline(buffer, "SWZ TA, bumpenvmat%d, y, w, 0, 0;\n", sampler_code);
1636     shader_addline(buffer, "DP3 TC.g, TA, %s;\n", src_name[1]);
1637 
1638     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
1639     shader_addline(buffer, "ADD %s, %s, TC;\n", dst_name, src_name[0]);
1640 }
1641 
1642 static DWORD negate_modifiers(DWORD mod, char *extra_char)
1643 {
1644     *extra_char = ' ';
1645     switch(mod)
1646     {
1647         case WINED3DSPSM_NONE:      return WINED3DSPSM_NEG;
1648         case WINED3DSPSM_NEG:       return WINED3DSPSM_NONE;
1649         case WINED3DSPSM_BIAS:      return WINED3DSPSM_BIASNEG;
1650         case WINED3DSPSM_BIASNEG:   return WINED3DSPSM_BIAS;
1651         case WINED3DSPSM_SIGN:      return WINED3DSPSM_SIGNNEG;
1652         case WINED3DSPSM_SIGNNEG:   return WINED3DSPSM_SIGN;
1653         case WINED3DSPSM_COMP:      *extra_char = '-'; return WINED3DSPSM_COMP;
1654         case WINED3DSPSM_X2:        return WINED3DSPSM_X2NEG;
1655         case WINED3DSPSM_X2NEG:     return WINED3DSPSM_X2;
1656         case WINED3DSPSM_DZ:        *extra_char = '-'; return WINED3DSPSM_DZ;
1657         case WINED3DSPSM_DW:        *extra_char = '-'; return WINED3DSPSM_DW;
1658         case WINED3DSPSM_ABS:       return WINED3DSPSM_ABSNEG;
1659         case WINED3DSPSM_ABSNEG:    return WINED3DSPSM_ABS;
1660     }
1661     FIXME("Unknown modifier %u\n", mod);
1662     return mod;
1663 }
1664 
1665 static void pshader_hw_cnd(const struct wined3d_shader_instruction *ins)
1666 {
1667     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1668     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1669     char dst_name[50];
1670     char src_name[3][50];
1671     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
1672             ins->ctx->reg_maps->shader_version.minor);
1673 
1674     shader_arb_get_dst_param(ins, dst, dst_name);
1675     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1676 
1677     if (shader_version <= WINED3D_SHADER_VERSION(1, 3) && ins->coissue
1678             && ins->dst->write_mask != WINED3DSP_WRITEMASK_3)
1679     {
1680         shader_addline(buffer, "MOV%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[1]);
1681     }
1682     else
1683     {
1684         struct wined3d_shader_src_param src0_copy = ins->src[0];
1685         char extra_neg;
1686 
1687         /* src0 may have a negate srcmod set, so we can't blindly add "-" to the name */
1688         src0_copy.modifiers = negate_modifiers(src0_copy.modifiers, &extra_neg);
1689 
1690         shader_arb_get_src_param(ins, &src0_copy, 0, src_name[0]);
1691         shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
1692         shader_addline(buffer, "ADD TA, %c%s, coefdiv.x;\n", extra_neg, src_name[0]);
1693         shader_addline(buffer, "CMP%s %s, TA, %s, %s;\n", shader_arb_get_modifier(ins),
1694                 dst_name, src_name[1], src_name[2]);
1695     }
1696 }
1697 
1698 static void pshader_hw_cmp(const struct wined3d_shader_instruction *ins)
1699 {
1700     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1701     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1702     char dst_name[50];
1703     char src_name[3][50];
1704 
1705     shader_arb_get_dst_param(ins, dst, dst_name);
1706 
1707     /* Generate input register names (with modifiers) */
1708     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
1709     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1710     shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
1711 
1712     shader_addline(buffer, "CMP%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
1713             dst_name, src_name[0], src_name[2], src_name[1]);
1714 }
1715 
1716 /** Process the WINED3DSIO_DP2ADD instruction in ARB.
1717  * dst = dot2(src0, src1) + src2 */
1718 static void pshader_hw_dp2add(const struct wined3d_shader_instruction *ins)
1719 {
1720     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1721     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1722     char dst_name[50];
1723     char src_name[3][50];
1724     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1725 
1726     shader_arb_get_dst_param(ins, dst, dst_name);
1727     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
1728     shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
1729 
1730     if(ctx->target_version >= NV3)
1731     {
1732         /* GL_NV_fragment_program2 has a 1:1 matching instruction */
1733         shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1734         shader_addline(buffer, "DP2A%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
1735                        dst_name, src_name[0], src_name[1], src_name[2]);
1736     }
1737     else if(ctx->target_version >= NV2)
1738     {
1739         /* dst.x = src2.?, src0.x, src1.x + src0.y * src1.y
1740          * dst.y = src2.?, src0.x, src1.z + src0.y * src1.w
1741          * dst.z = src2.?, src0.x, src1.x + src0.y * src1.y
1742          * dst.z = src2.?, src0.x, src1.z + src0.y * src1.w
1743          *
1744          * Make sure that src1.zw = src1.xy, then we get a classic dp2add
1745          *
1746          * .xyxy and other swizzles that we could get with this are not valid in
1747          * plain ARBfp, but luckily the NV extension grammar lifts this limitation.
1748          */
1749         struct wined3d_shader_src_param tmp_param = ins->src[1];
1750         DWORD swizzle = tmp_param.swizzle & 0xf; /* Selects .xy */
1751         tmp_param.swizzle = swizzle | (swizzle << 4); /* Creates .xyxy */
1752 
1753         shader_arb_get_src_param(ins, &tmp_param, 1, src_name[1]);
1754 
1755         shader_addline(buffer, "X2D%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
1756                        dst_name, src_name[2], src_name[0], src_name[1]);
1757     }
1758     else
1759     {
1760         shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1761         /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2]
1762         * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite
1763         */
1764         shader_addline(buffer, "MOV TA, %s;\n", src_name[0]);
1765         shader_addline(buffer, "MOV TA.z, 0.0;\n");
1766         shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]);
1767         shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]);
1768     }
1769 }
1770 
1771 /* Map the opcode 1-to-1 to the GL code */
1772 static void shader_hw_map2gl(const struct wined3d_shader_instruction *ins)
1773 {
1774     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1775     const char *instruction;
1776     char arguments[256], dst_str[50];
1777     unsigned int i;
1778     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1779 
1780     switch (ins->handler_idx)
1781     {
1782         case WINED3DSIH_ABS: instruction = "ABS"; break;
1783         case WINED3DSIH_ADD: instruction = "ADD"; break;
1784         case WINED3DSIH_CRS: instruction = "XPD"; break;
1785         case WINED3DSIH_DP3: instruction = "DP3"; break;
1786         case WINED3DSIH_DP4: instruction = "DP4"; break;
1787         case WINED3DSIH_DST: instruction = "DST"; break;
1788         case WINED3DSIH_FRC: instruction = "FRC"; break;
1789         case WINED3DSIH_LIT: instruction = "LIT"; break;
1790         case WINED3DSIH_LRP: instruction = "LRP"; break;
1791         case WINED3DSIH_MAD: instruction = "MAD"; break;
1792         case WINED3DSIH_MAX: instruction = "MAX"; break;
1793         case WINED3DSIH_MIN: instruction = "MIN"; break;
1794         case WINED3DSIH_MOV: instruction = "MOV"; break;
1795         case WINED3DSIH_MUL: instruction = "MUL"; break;
1796         case WINED3DSIH_SGE: instruction = "SGE"; break;
1797         case WINED3DSIH_SLT: instruction = "SLT"; break;
1798         case WINED3DSIH_SUB: instruction = "SUB"; break;
1799         case WINED3DSIH_MOVA:instruction = "ARR"; break;
1800         case WINED3DSIH_DSX: instruction = "DDX"; break;
1801         default: instruction = "";
1802             FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
1803             break;
1804     }
1805 
1806     /* Note that shader_arb_add_dst_param() adds spaces. */
1807     arguments[0] = '\0';
1808     shader_arb_get_dst_param(ins, dst, dst_str);
1809     for (i = 0; i < ins->src_count; ++i)
1810     {
1811         char operand[100];
1812         strcat(arguments, ", ");
1813         shader_arb_get_src_param(ins, &ins->src[i], i, operand);
1814         strcat(arguments, operand);
1815     }
1816     shader_addline(buffer, "%s%s %s%s;\n", instruction, shader_arb_get_modifier(ins), dst_str, arguments);
1817 }
1818 
1819 static void shader_hw_nop(const struct wined3d_shader_instruction *ins) {}
1820 
1821 static DWORD shader_arb_select_component(DWORD swizzle, DWORD component)
1822 {
1823     return ((swizzle >> 2 * component) & 0x3) * 0x55;
1824 }
1825 
1826 static void shader_hw_mov(const struct wined3d_shader_instruction *ins)
1827 {
1828     const struct wined3d_shader *shader = ins->ctx->shader;
1829     const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
1830     BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type);
1831     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1832     const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO);
1833     const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE);
1834     const char *two = arb_get_helper_value(reg_maps->shader_version.type, ARB_TWO);
1835 
1836     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1837     char src0_param[256];
1838 
1839     if (ins->handler_idx == WINED3DSIH_MOVA)
1840     {
1841         const struct arb_vshader_private *shader_data = shader->backend_data;
1842         char write_mask[6];
1843         const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET);
1844 
1845         if(ctx->target_version >= NV2) {
1846             shader_hw_map2gl(ins);
1847             return;
1848         }
1849         shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param);
1850         shader_arb_get_write_mask(ins, &ins->dst[0], write_mask);
1851 
1852         /* This implements the mova formula used in GLSL. The first two instructions
1853          * prepare the sign() part. Note that it is fine to have my_sign(0.0) = 1.0
1854          * in this case:
1855          * mova A0.x, 0.0
1856          *
1857          * A0.x = arl(floor(abs(0.0) + 0.5) * 1.0) = floor(0.5) = 0.0 since arl does a floor
1858          *
1859          * The ARL is performed when A0 is used - the requested component is read from A0_SHADOW into
1860          * A0.x. We can use the overwritten component of A0_shadow as temporary storage for the sign.
1861          */
1862         shader_addline(buffer, "SGE A0_SHADOW%s, %s, %s;\n", write_mask, src0_param, zero);
1863         shader_addline(buffer, "MAD A0_SHADOW%s, A0_SHADOW, %s, -%s;\n", write_mask, two, one);
1864 
1865         shader_addline(buffer, "ABS TA%s, %s;\n", write_mask, src0_param);
1866         shader_addline(buffer, "ADD TA%s, TA, rel_addr_const.x;\n", write_mask);
1867         shader_addline(buffer, "FLR TA%s, TA;\n", write_mask);
1868         if (shader_data->rel_offset)
1869         {
1870             shader_addline(buffer, "ADD TA%s, TA, %s;\n", write_mask, offset);
1871         }
1872         shader_addline(buffer, "MUL A0_SHADOW%s, TA, A0_SHADOW;\n", write_mask);
1873 
1874         ((struct shader_arb_ctx_priv *)ins->ctx->backend_data)->addr_reg[0] = '\0';
1875     }
1876     else if (reg_maps->shader_version.major == 1
1877           && !shader_is_pshader_version(reg_maps->shader_version.type)
1878           && ins->dst[0].reg.type == WINED3DSPR_ADDR)
1879     {
1880         const struct arb_vshader_private *shader_data = shader->backend_data;
1881         src0_param[0] = '\0';
1882 
1883         if (shader_data->rel_offset && ctx->target_version == ARB)
1884         {
1885             const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET);
1886             shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param);
1887             shader_addline(buffer, "ADD TA.x, %s, %s;\n", src0_param, offset);
1888             shader_addline(buffer, "ARL A0.x, TA.x;\n");
1889         }
1890         else
1891         {
1892             /* Apple's ARB_vertex_program implementation does not accept an ARL source argument
1893              * with more than one component. Thus replicate the first source argument over all
1894              * 4 components. For example, .xyzw -> .x (or better: .xxxx), .zwxy -> .z, etc) */
1895             struct wined3d_shader_src_param tmp_src = ins->src[0];
1896             tmp_src.swizzle = shader_arb_select_component(tmp_src.swizzle, 0);
1897             shader_arb_get_src_param(ins, &tmp_src, 0, src0_param);
1898             shader_addline(buffer, "ARL A0.x, %s;\n", src0_param);
1899         }
1900     }
1901     else if (ins->dst[0].reg.type == WINED3DSPR_COLOROUT && !ins->dst[0].reg.idx[0].offset && pshader)
1902     {
1903         if (ctx->ps_post_process && shader->u.ps.color0_mov)
1904         {
1905             shader_addline(buffer, "#mov handled in srgb write or fog code\n");
1906             return;
1907         }
1908         shader_hw_map2gl(ins);
1909     }
1910     else
1911     {
1912         shader_hw_map2gl(ins);
1913     }
1914 }
1915 
1916 static void pshader_hw_texkill(const struct wined3d_shader_instruction *ins)
1917 {
1918     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1919     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1920     char reg_dest[40];
1921 
1922     /* No swizzles are allowed in d3d's texkill. PS 1.x ignores the 4th component as documented,
1923      * but >= 2.0 honors it (undocumented, but tested by the d3d9 testsuite)
1924      */
1925     shader_arb_get_dst_param(ins, dst, reg_dest);
1926 
1927     if (ins->ctx->reg_maps->shader_version.major >= 2)
1928     {
1929         const char *kilsrc = "TA";
1930         BOOL is_color;
1931 
1932         shader_arb_get_register_name(ins, &dst->reg, reg_dest, &is_color);
1933         if(dst->write_mask == WINED3DSP_WRITEMASK_ALL)
1934         {
1935             kilsrc = reg_dest;
1936         }
1937         else
1938         {
1939             /* Sigh. KIL doesn't support swizzles/writemasks. KIL passes a writemask, but ".xy" for example
1940              * is not valid as a swizzle in ARB (needs ".xyyy"). Use SWZ to load the register properly, and set
1941              * masked out components to 0(won't kill)
1942              */
1943             char x = '0', y = '0', z = '0', w = '0';
1944             if(dst->write_mask & WINED3DSP_WRITEMASK_0) x = 'x';
1945             if(dst->write_mask & WINED3DSP_WRITEMASK_1) y = 'y';
1946             if(dst->write_mask & WINED3DSP_WRITEMASK_2) z = 'z';
1947             if(dst->write_mask & WINED3DSP_WRITEMASK_3) w = 'w';
1948             shader_addline(buffer, "SWZ TA, %s, %c, %c, %c, %c;\n", reg_dest, x, y, z, w);
1949         }
1950         shader_addline(buffer, "KIL %s;\n", kilsrc);
1951     }
1952     else
1953     {
1954         /* ARB fp doesn't like swizzles on the parameter of the KIL instruction. To mask the 4th component,
1955          * copy the register into our general purpose TMP variable, overwrite .w and pass TMP to KIL
1956          *
1957          * ps_1_3 shaders use the texcoord incarnation of the Tx register. ps_1_4 shaders can use the same,
1958          * or pass in any temporary register(in shader phase 2)
1959          */
1960         if (ins->ctx->reg_maps->shader_version.minor <= 3)
1961             sprintf(reg_dest, "fragment.texcoord[%u]", dst->reg.idx[0].offset);
1962         else
1963             shader_arb_get_dst_param(ins, dst, reg_dest);
1964         shader_addline(buffer, "SWZ TA, %s, x, y, z, 1;\n", reg_dest);
1965         shader_addline(buffer, "KIL TA;\n");
1966     }
1967 }
1968 
1969 static void pshader_hw_tex(const struct wined3d_shader_instruction *ins)
1970 {
1971     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
1972     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1973     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
1974             ins->ctx->reg_maps->shader_version.minor);
1975     struct wined3d_shader_src_param src;
1976 
1977     char reg_dest[40];
1978     char reg_coord[40];
1979     DWORD reg_sampler_code;
1980     WORD myflags = 0;
1981     BOOL swizzle_coord = FALSE;
1982 
1983     /* All versions have a destination register */
1984     shader_arb_get_dst_param(ins, dst, reg_dest);
1985 
1986     /* 1.0-1.4: Use destination register number as texture code.
1987        2.0+: Use provided sampler number as texture code. */
1988     if (shader_version < WINED3D_SHADER_VERSION(2,0))
1989         reg_sampler_code = dst->reg.idx[0].offset;
1990     else
1991         reg_sampler_code = ins->src[1].reg.idx[0].offset;
1992 
1993     /* 1.0-1.3: Use the texcoord varying.
1994        1.4+: Use provided coordinate source register. */
1995     if (shader_version < WINED3D_SHADER_VERSION(1,4))
1996         sprintf(reg_coord, "fragment.texcoord[%u]", reg_sampler_code);
1997     else {
1998         /* TEX is the only instruction that can handle DW and DZ natively */
1999         src = ins->src[0];
2000         if(src.modifiers == WINED3DSPSM_DW) src.modifiers = WINED3DSPSM_NONE;
2001         if(src.modifiers == WINED3DSPSM_DZ) src.modifiers = WINED3DSPSM_NONE;
2002         shader_arb_get_src_param(ins, &src, 0, reg_coord);
2003     }
2004 
2005     /* projection flag:
2006      * 1.1, 1.2, 1.3: Use WINED3D_TSS_TEXTURETRANSFORMFLAGS
2007      * 1.4: Use WINED3DSPSM_DZ or WINED3DSPSM_DW on src[0]
2008      * 2.0+: Use WINED3DSI_TEXLD_PROJECT on the opcode
2009      */
2010     if (shader_version < WINED3D_SHADER_VERSION(1,4))
2011     {
2012         DWORD flags = 0;
2013         if (reg_sampler_code < MAX_TEXTURES)
2014             flags = priv->cur_ps_args->super.tex_transform >> reg_sampler_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
2015         if (flags & WINED3D_PSARGS_PROJECTED)
2016         {
2017             myflags |= TEX_PROJ;
2018             if ((flags & ~WINED3D_PSARGS_PROJECTED) == WINED3D_TTFF_COUNT3)
2019                 swizzle_coord = TRUE;
2020         }
2021     }
2022     else if (shader_version < WINED3D_SHADER_VERSION(2,0))
2023     {
2024         enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers;
2025         if (src_mod == WINED3DSPSM_DZ)
2026         {
2027             swizzle_coord = TRUE;
2028             myflags |= TEX_PROJ;
2029         } else if(src_mod == WINED3DSPSM_DW) {
2030             myflags |= TEX_PROJ;
2031         }
2032     } else {
2033         if (ins->flags & WINED3DSI_TEXLD_PROJECT) myflags |= TEX_PROJ;
2034         if (ins->flags & WINED3DSI_TEXLD_BIAS) myflags |= TEX_BIAS;
2035     }
2036 
2037     if (swizzle_coord)
2038     {
2039         /* TXP cannot handle DZ natively, so move the z coordinate to .w.
2040          * reg_coord is a read-only varying register, so we need a temp reg */
2041         shader_addline(ins->ctx->buffer, "SWZ TA, %s, x, y, z, z;\n", reg_coord);
2042         strcpy(reg_coord, "TA");
2043     }
2044 
2045     shader_hw_sample(ins, reg_sampler_code, reg_dest, reg_coord, myflags, NULL, NULL);
2046 }
2047 
2048 static void pshader_hw_texcoord(const struct wined3d_shader_instruction *ins)
2049 {
2050     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2051     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2052     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
2053             ins->ctx->reg_maps->shader_version.minor);
2054     char dst_str[50];
2055 
2056     if (shader_version < WINED3D_SHADER_VERSION(1,4))
2057     {
2058         DWORD reg = dst->reg.idx[0].offset;
2059 
2060         shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2061         shader_addline(buffer, "MOV_SAT %s, fragment.texcoord[%u];\n", dst_str, reg);
2062     } else {
2063         char reg_src[40];
2064 
2065         shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src);
2066         shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2067         shader_addline(buffer, "MOV %s, %s;\n", dst_str, reg_src);
2068     }
2069 }
2070 
2071 static void pshader_hw_texreg2ar(const struct wined3d_shader_instruction *ins)
2072 {
2073      struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2074      DWORD flags = 0;
2075 
2076      DWORD reg1 = ins->dst[0].reg.idx[0].offset;
2077      char dst_str[50];
2078      char src_str[50];
2079 
2080      /* Note that texreg2ar treats Tx as a temporary register, not as a varying */
2081      shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2082      shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
2083      /* Move .x first in case src_str is "TA" */
2084      shader_addline(buffer, "MOV TA.y, %s.x;\n", src_str);
2085      shader_addline(buffer, "MOV TA.x, %s.w;\n", src_str);
2086      if (reg1 < MAX_TEXTURES)
2087      {
2088          struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2089          flags = priv->cur_ps_args->super.tex_transform >> reg1 * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
2090      }
2091      shader_hw_sample(ins, reg1, dst_str, "TA", flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2092 }
2093 
2094 static void pshader_hw_texreg2gb(const struct wined3d_shader_instruction *ins)
2095 {
2096      struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2097 
2098      DWORD reg1 = ins->dst[0].reg.idx[0].offset;
2099      char dst_str[50];
2100      char src_str[50];
2101 
2102      /* Note that texreg2gb treats Tx as a temporary register, not as a varying */
2103      shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2104      shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
2105      shader_addline(buffer, "MOV TA.x, %s.y;\n", src_str);
2106      shader_addline(buffer, "MOV TA.y, %s.z;\n", src_str);
2107      shader_hw_sample(ins, reg1, dst_str, "TA", 0, NULL, NULL);
2108 }
2109 
2110 static void pshader_hw_texreg2rgb(const struct wined3d_shader_instruction *ins)
2111 {
2112     DWORD reg1 = ins->dst[0].reg.idx[0].offset;
2113     char dst_str[50];
2114     char src_str[50];
2115 
2116     /* Note that texreg2rg treats Tx as a temporary register, not as a varying */
2117     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2118     shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
2119     shader_hw_sample(ins, reg1, dst_str, src_str, 0, NULL, NULL);
2120 }
2121 
2122 static void pshader_hw_texbem(const struct wined3d_shader_instruction *ins)
2123 {
2124     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2125     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2126     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2127     char reg_coord[40], dst_reg[50], src_reg[50];
2128     DWORD reg_dest_code;
2129 
2130     /* All versions have a destination register. The Tx where the texture coordinates come
2131      * from is the varying incarnation of the texture register
2132      */
2133     reg_dest_code = dst->reg.idx[0].offset;
2134     shader_arb_get_dst_param(ins, &ins->dst[0], dst_reg);
2135     shader_arb_get_src_param(ins, &ins->src[0], 0, src_reg);
2136     sprintf(reg_coord, "fragment.texcoord[%u]", reg_dest_code);
2137 
2138     /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
2139      * The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register
2140      *
2141      * GL_NV_fragment_program_option could handle this in one instruction via X2D:
2142      * X2D TA.xy, fragment.texcoord, T%u, bumpenvmat%u.xzyw
2143      *
2144      * However, the NV extensions are never enabled for <= 2.0 shaders because of the performance penalty that
2145      * comes with it, and texbem is an 1.x only instruction. No 1.x instruction forces us to enable the NV
2146      * extension.
2147      */
2148     shader_addline(buffer, "SWZ TB, bumpenvmat%d, x, z, 0, 0;\n", reg_dest_code);
2149     shader_addline(buffer, "DP3 TA.x, TB, %s;\n", src_reg);
2150     shader_addline(buffer, "SWZ TB, bumpenvmat%d, y, w, 0, 0;\n", reg_dest_code);
2151     shader_addline(buffer, "DP3 TA.y, TB, %s;\n", src_reg);
2152 
2153     /* with projective textures, texbem only divides the static texture coord, not the displacement,
2154      * so we can't let the GL handle this.
2155      */
2156     if ((priv->cur_ps_args->super.tex_transform >> reg_dest_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT)
2157             & WINED3D_PSARGS_PROJECTED)
2158     {
2159         shader_addline(buffer, "RCP TB.w, %s.w;\n", reg_coord);
2160         shader_addline(buffer, "MUL TB.xy, %s, TB.w;\n", reg_coord);
2161         shader_addline(buffer, "ADD TA.xy, TA, TB;\n");
2162     } else {
2163         shader_addline(buffer, "ADD TA.xy, TA, %s;\n", reg_coord);
2164     }
2165 
2166     shader_hw_sample(ins, reg_dest_code, dst_reg, "TA", 0, NULL, NULL);
2167 
2168     if (ins->handler_idx == WINED3DSIH_TEXBEML)
2169     {
2170         /* No src swizzles are allowed, so this is ok */
2171         shader_addline(buffer, "MAD TA, %s.z, luminance%d.x, luminance%d.y;\n",
2172                        src_reg, reg_dest_code, reg_dest_code);
2173         shader_addline(buffer, "MUL %s, %s, TA;\n", dst_reg, dst_reg);
2174     }
2175 }
2176 
2177 static void pshader_hw_texm3x2pad(const struct wined3d_shader_instruction *ins)
2178 {
2179     DWORD reg = ins->dst[0].reg.idx[0].offset;
2180     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2181     char src0_name[50], dst_name[50];
2182     BOOL is_color;
2183     struct wined3d_shader_register tmp_reg = ins->dst[0].reg;
2184 
2185     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2186     /* The next instruction will be a texm3x2tex or texm3x2depth that writes to the uninitialized
2187      * T<reg+1> register. Use this register to store the calculated vector
2188      */
2189     tmp_reg.idx[0].offset = reg + 1;
2190     shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color);
2191     shader_addline(buffer, "DP3 %s.x, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name);
2192 }
2193 
2194 static void pshader_hw_texm3x2tex(const struct wined3d_shader_instruction *ins)
2195 {
2196     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2197     DWORD flags;
2198     DWORD reg = ins->dst[0].reg.idx[0].offset;
2199     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2200     char dst_str[50];
2201     char src0_name[50];
2202     char dst_reg[50];
2203     BOOL is_color;
2204 
2205     /* We know that we're writing to the uninitialized T<reg> register, so use it for temporary storage */
2206     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
2207 
2208     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2209     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2210     shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
2211     flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2212     shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2213 }
2214 
2215 static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins)
2216 {
2217     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2218     DWORD reg = ins->dst[0].reg.idx[0].offset;
2219     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2220     char src0_name[50], dst_name[50];
2221     struct wined3d_shader_register tmp_reg = ins->dst[0].reg;
2222     BOOL is_color;
2223 
2224     /* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with
2225      * incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination
2226      * register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared')
2227      */
2228     tmp_reg.idx[0].offset = reg + 2 - tex_mx->current_row;
2229     shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color);
2230 
2231     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2232     shader_addline(buffer, "DP3 %s.%c, fragment.texcoord[%u], %s;\n",
2233                    dst_name, 'x' + tex_mx->current_row, reg, src0_name);
2234     tex_mx->texcoord_w[tex_mx->current_row++] = reg;
2235 }
2236 
2237 static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins)
2238 {
2239     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2240     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2241     DWORD flags;
2242     DWORD reg = ins->dst[0].reg.idx[0].offset;
2243     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2244     char dst_str[50];
2245     char src0_name[50], dst_name[50];
2246     BOOL is_color;
2247 
2248     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
2249     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2250     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name);
2251 
2252     /* Sample the texture using the calculated coordinates */
2253     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2254     flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2255     shader_hw_sample(ins, reg, dst_str, dst_name, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2256     tex_mx->current_row = 0;
2257 }
2258 
2259 static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins)
2260 {
2261     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2262     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2263     DWORD flags;
2264     DWORD reg = ins->dst[0].reg.idx[0].offset;
2265     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2266     char dst_str[50];
2267     char src0_name[50];
2268     char dst_reg[50];
2269     BOOL is_color;
2270 
2271     /* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all
2272      * components for temporary data storage
2273      */
2274     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
2275     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2276     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
2277 
2278     /* Construct the eye-ray vector from w coordinates */
2279     shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[0]);
2280     shader_addline(buffer, "MOV TB.y, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[1]);
2281     shader_addline(buffer, "MOV TB.z, fragment.texcoord[%u].w;\n", reg);
2282 
2283     /* Calculate reflection vector
2284      */
2285     shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg);
2286     /* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */
2287     shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg);
2288     shader_addline(buffer, "RCP TB.w, TB.w;\n");
2289     shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg);
2290     shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
2291     shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg);
2292 
2293     /* Sample the texture using the calculated coordinates */
2294     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2295     flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2296     shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2297     tex_mx->current_row = 0;
2298 }
2299 
2300 static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins)
2301 {
2302     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2303     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2304     DWORD flags;
2305     DWORD reg = ins->dst[0].reg.idx[0].offset;
2306     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2307     char dst_str[50];
2308     char src0_name[50];
2309     char src1_name[50];
2310     char dst_reg[50];
2311     BOOL is_color;
2312 
2313     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2314     shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name);
2315     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
2316     /* Note: dst_reg.xy is input here, generated by two texm3x3pad instructions */
2317     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
2318 
2319     /* Calculate reflection vector.
2320      *
2321      *                   dot(N, E)
2322      * dst_reg.xyz = 2 * --------- * N - E
2323      *                   dot(N, N)
2324      *
2325      * Which normalizes the normal vector
2326      */
2327     shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name);
2328     shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg);
2329     shader_addline(buffer, "RCP TC.w, TC.w;\n");
2330     shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg);
2331     shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
2332     shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name);
2333 
2334     /* Sample the texture using the calculated coordinates */
2335     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2336     flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2337     shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2338     tex_mx->current_row = 0;
2339 }
2340 
2341 static void pshader_hw_texdepth(const struct wined3d_shader_instruction *ins)
2342 {
2343     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2344     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2345     char dst_name[50];
2346     const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
2347     const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
2348 
2349     /* texdepth has an implicit destination, the fragment depth value. It's only parameter,
2350      * which is essentially an input, is the destination register because it is the first
2351      * parameter. According to the msdn, this must be register r5, but let's keep it more flexible
2352      * here(writemasks/swizzles are not valid on texdepth)
2353      */
2354     shader_arb_get_dst_param(ins, dst, dst_name);
2355 
2356     /* According to the msdn, the source register(must be r5) is unusable after
2357      * the texdepth instruction, so we're free to modify it
2358      */
2359     shader_addline(buffer, "MIN %s.y, %s.y, %s;\n", dst_name, dst_name, one);
2360 
2361     /* How to deal with the special case dst_name.g == 0? if r != 0, then
2362      * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
2363      * result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
2364      */
2365     shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name);
2366     shader_addline(buffer, "MUL TA.x, %s.x, %s.y;\n", dst_name, dst_name);
2367     shader_addline(buffer, "MIN TA.x, TA.x, %s;\n", one);
2368     shader_addline(buffer, "MAX result.depth, TA.x, %s;\n", zero);
2369 }
2370 
2371 /** Process the WINED3DSIO_TEXDP3TEX instruction in ARB:
2372  * Take a 3-component dot product of the TexCoord[dstreg] and src,
2373  * then perform a 1D texture lookup from stage dstregnum, place into dst. */
2374 static void pshader_hw_texdp3tex(const struct wined3d_shader_instruction *ins)
2375 {
2376     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2377     DWORD sampler_idx = ins->dst[0].reg.idx[0].offset;
2378     char src0[50];
2379     char dst_str[50];
2380 
2381     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2382     shader_addline(buffer, "MOV TB, 0.0;\n");
2383     shader_addline(buffer, "DP3 TB.x, fragment.texcoord[%u], %s;\n", sampler_idx, src0);
2384 
2385     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2386     shader_hw_sample(ins, sampler_idx, dst_str, "TB", 0 /* Only one coord, can't be projected */, NULL, NULL);
2387 }
2388 
2389 /** Process the WINED3DSIO_TEXDP3 instruction in ARB:
2390  * Take a 3-component dot product of the TexCoord[dstreg] and src. */
2391 static void pshader_hw_texdp3(const struct wined3d_shader_instruction *ins)
2392 {
2393     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2394     char src0[50];
2395     char dst_str[50];
2396     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2397 
2398     /* Handle output register */
2399     shader_arb_get_dst_param(ins, dst, dst_str);
2400     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2401     shader_addline(buffer, "DP3 %s, fragment.texcoord[%u], %s;\n", dst_str, dst->reg.idx[0].offset, src0);
2402 }
2403 
2404 /** Process the WINED3DSIO_TEXM3X3 instruction in ARB
2405  * Perform the 3rd row of a 3x3 matrix multiply */
2406 static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins)
2407 {
2408     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2409     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2410     char dst_str[50], dst_name[50];
2411     char src0[50];
2412     BOOL is_color;
2413 
2414     shader_arb_get_dst_param(ins, dst, dst_str);
2415     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2416     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
2417     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0);
2418     shader_addline(buffer, "MOV %s, %s;\n", dst_str, dst_name);
2419 }
2420 
2421 /** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB:
2422  * Last row of a 3x2 matrix multiply, use the result to calculate the depth:
2423  * Calculate tmp0.y = TexCoord[dstreg] . src.xyz;  (tmp0.x has already been calculated)
2424  * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
2425  */
2426 static void pshader_hw_texm3x2depth(const struct wined3d_shader_instruction *ins)
2427 {
2428     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2429     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2430     char src0[50], dst_name[50];
2431     BOOL is_color;
2432     const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
2433     const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
2434 
2435     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2436     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
2437     shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0);
2438 
2439     /* How to deal with the special case dst_name.g == 0? if r != 0, then
2440      * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
2441      * result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
2442      */
2443     shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name);
2444     shader_addline(buffer, "MUL %s.x, %s.x, %s.y;\n", dst_name, dst_name, dst_name);
2445     shader_addline(buffer, "MIN %s.x, %s.x, %s;\n", dst_name, dst_name, one);
2446     shader_addline(buffer, "MAX result.depth, %s.x, %s;\n", dst_name, zero);
2447 }
2448 
2449 /** Handles transforming all WINED3DSIO_M?x? opcodes for
2450     Vertex/Pixel shaders to ARB_vertex_program codes */
2451 static void shader_hw_mnxn(const struct wined3d_shader_instruction *ins)
2452 {
2453     int i;
2454     int nComponents = 0;
2455     struct wined3d_shader_dst_param tmp_dst = {{0}};
2456     struct wined3d_shader_src_param tmp_src[2] = {{{0}}};
2457     struct wined3d_shader_instruction tmp_ins;
2458 
2459     memset(&tmp_ins, 0, sizeof(tmp_ins));
2460 
2461     /* Set constants for the temporary argument */
2462     tmp_ins.ctx = ins->ctx;
2463     tmp_ins.dst_count = 1;
2464     tmp_ins.dst = &tmp_dst;
2465     tmp_ins.src_count = 2;
2466     tmp_ins.src = tmp_src;
2467 
2468     switch(ins->handler_idx)
2469     {
2470         case WINED3DSIH_M4x4:
2471             nComponents = 4;
2472             tmp_ins.handler_idx = WINED3DSIH_DP4;
2473             break;
2474         case WINED3DSIH_M4x3:
2475             nComponents = 3;
2476             tmp_ins.handler_idx = WINED3DSIH_DP4;
2477             break;
2478         case WINED3DSIH_M3x4:
2479             nComponents = 4;
2480             tmp_ins.handler_idx = WINED3DSIH_DP3;
2481             break;
2482         case WINED3DSIH_M3x3:
2483             nComponents = 3;
2484             tmp_ins.handler_idx = WINED3DSIH_DP3;
2485             break;
2486         case WINED3DSIH_M3x2:
2487             nComponents = 2;
2488             tmp_ins.handler_idx = WINED3DSIH_DP3;
2489             break;
2490         default:
2491             FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
2492             break;
2493     }
2494 
2495     tmp_dst = ins->dst[0];
2496     tmp_src[0] = ins->src[0];
2497     tmp_src[1] = ins->src[1];
2498     for (i = 0; i < nComponents; ++i)
2499     {
2500         tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i;
2501         shader_hw_map2gl(&tmp_ins);
2502         ++tmp_src[1].reg.idx[0].offset;
2503     }
2504 }
2505 
2506 static DWORD abs_modifier(DWORD mod, BOOL *need_abs)
2507 {
2508     *need_abs = FALSE;
2509 
2510     switch(mod)
2511     {
2512         case WINED3DSPSM_NONE:      return WINED3DSPSM_ABS;
2513         case WINED3DSPSM_NEG:       return WINED3DSPSM_ABS;
2514         case WINED3DSPSM_BIAS:      *need_abs = TRUE; return WINED3DSPSM_BIAS;
2515         case WINED3DSPSM_BIASNEG:   *need_abs = TRUE; return WINED3DSPSM_BIASNEG;
2516         case WINED3DSPSM_SIGN:      *need_abs = TRUE; return WINED3DSPSM_SIGN;
2517         case WINED3DSPSM_SIGNNEG:   *need_abs = TRUE; return WINED3DSPSM_SIGNNEG;
2518         case WINED3DSPSM_COMP:      *need_abs = TRUE; return WINED3DSPSM_COMP;
2519         case WINED3DSPSM_X2:        *need_abs = TRUE; return WINED3DSPSM_X2;
2520         case WINED3DSPSM_X2NEG:     *need_abs = TRUE; return WINED3DSPSM_X2NEG;
2521         case WINED3DSPSM_DZ:        *need_abs = TRUE; return WINED3DSPSM_DZ;
2522         case WINED3DSPSM_DW:        *need_abs = TRUE; return WINED3DSPSM_DW;
2523         case WINED3DSPSM_ABS:       return WINED3DSPSM_ABS;
2524         case WINED3DSPSM_ABSNEG:    return WINED3DSPSM_ABS;
2525     }
2526     FIXME("Unknown modifier %u\n", mod);
2527     return mod;
2528 }
2529 
2530 static void shader_hw_scalar_op(const struct wined3d_shader_instruction *ins)
2531 {
2532     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2533     const char *instruction;
2534     struct wined3d_shader_src_param src0_copy = ins->src[0];
2535     BOOL need_abs = FALSE;
2536 
2537     char dst[50];
2538     char src[50];
2539 
2540     switch(ins->handler_idx)
2541     {
2542         case WINED3DSIH_RSQ:  instruction = "RSQ"; break;
2543         case WINED3DSIH_RCP:  instruction = "RCP"; break;
2544         case WINED3DSIH_EXPP:
2545             if (ins->ctx->reg_maps->shader_version.major < 2)
2546             {
2547                 instruction = "EXP";
2548                 break;
2549             }
2550             /* Drop through. */
2551         case WINED3DSIH_EXP:
2552             instruction = "EX2";
2553             break;
2554         case WINED3DSIH_LOG:
2555         case WINED3DSIH_LOGP:
2556             /* The precision requirements suggest that LOGP matches ARBvp's LOG
2557              * instruction, but notice that the output of those instructions is
2558              * different. */
2559             src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs);
2560             instruction = "LG2";
2561             break;
2562         default: instruction = "";
2563             FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
2564             break;
2565     }
2566 
2567     /* Dx sdk says .x is used if no swizzle is given, but our test shows that
2568      * .w is used. */
2569     src0_copy.swizzle = shader_arb_select_component(src0_copy.swizzle, 3);
2570 
2571     shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */
2572     shader_arb_get_src_param(ins, &src0_copy, 0, src);
2573 
2574     if(need_abs)
2575     {
2576         shader_addline(buffer, "ABS TA.w, %s;\n", src);
2577         shader_addline(buffer, "%s%s %s, TA.w;\n", instruction, shader_arb_get_modifier(ins), dst);
2578     }
2579     else
2580     {
2581         shader_addline(buffer, "%s%s %s, %s;\n", instruction, shader_arb_get_modifier(ins), dst, src);
2582     }
2583 
2584 }
2585 
2586 static void shader_hw_nrm(const struct wined3d_shader_instruction *ins)
2587 {
2588     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2589     char dst_name[50];
2590     char src_name[50];
2591     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2592     BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type);
2593     const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
2594 
2595     shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
2596     shader_arb_get_src_param(ins, &ins->src[0], 1 /* Use TB */, src_name);
2597 
2598     /* In D3D, NRM of a vector with length zero returns zero. Catch this situation, as
2599      * otherwise NRM or RSQ would return NaN */
2600     if(pshader && priv->target_version >= NV3)
2601     {
2602         /* GL_NV_fragment_program2's NRM needs protection against length zero vectors too
2603          *
2604          * TODO: Find out if DP3+NRM+MOV is really faster than DP3+RSQ+MUL
2605          */
2606         shader_addline(buffer, "DP3C TA, %s, %s;\n", src_name, src_name);
2607         shader_addline(buffer, "NRM%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name);
2608         shader_addline(buffer, "MOV %s (EQ), %s;\n", dst_name, zero);
2609     }
2610     else if(priv->target_version >= NV2)
2611     {
2612         shader_addline(buffer, "DP3C TA.x, %s, %s;\n", src_name, src_name);
2613         shader_addline(buffer, "RSQ TA.x (NE), TA.x;\n");
2614         shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name,
2615                        src_name);
2616     }
2617     else
2618     {
2619         const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
2620 
2621         shader_addline(buffer, "DP3 TA.x, %s, %s;\n", src_name, src_name);
2622         /* Pass any non-zero value to RSQ if the input vector has a length of zero. The
2623          * RSQ result doesn't matter, as long as multiplying it by 0 returns 0.
2624          */
2625         shader_addline(buffer, "SGE TA.y, -TA.x, %s;\n", zero);
2626         shader_addline(buffer, "MAD TA.x, %s, TA.y, TA.x;\n", one);
2627 
2628         shader_addline(buffer, "RSQ TA.x, TA.x;\n");
2629         /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/
2630         shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name,
2631                     src_name);
2632     }
2633 }
2634 
2635 static void shader_hw_lrp(const struct wined3d_shader_instruction *ins)
2636 {
2637     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2638     char dst_name[50];
2639     char src_name[3][50];
2640 
2641     /* ARB_fragment_program has a convenient LRP instruction */
2642     if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) {
2643         shader_hw_map2gl(ins);
2644         return;
2645     }
2646 
2647     shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
2648     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
2649     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
2650     shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
2651 
2652     shader_addline(buffer, "SUB TA, %s, %s;\n", src_name[1], src_name[2]);
2653     shader_addline(buffer, "MAD%s %s, %s, TA, %s;\n", shader_arb_get_modifier(ins),
2654                    dst_name, src_name[0], src_name[2]);
2655 }
2656 
2657 static void shader_hw_sincos(const struct wined3d_shader_instruction *ins)
2658 {
2659     /* This instruction exists in ARB, but the d3d instruction takes two extra parameters which
2660      * must contain fixed constants. So we need a separate function to filter those constants and
2661      * can't use map2gl
2662      */
2663     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2664     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2665     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2666     char dst_name[50];
2667     char src_name0[50], src_name1[50], src_name2[50];
2668     BOOL is_color;
2669 
2670     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
2671     if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) {
2672         shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
2673         /* No modifiers are supported on SCS */
2674         shader_addline(buffer, "SCS %s, %s;\n", dst_name, src_name0);
2675 
2676         if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE)
2677         {
2678             shader_arb_get_register_name(ins, &dst->reg, src_name0, &is_color);
2679             shader_addline(buffer, "MOV_SAT %s, %s;\n", dst_name, src_name0);
2680         }
2681     } else if(priv->target_version >= NV2) {
2682         shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
2683 
2684         /* Sincos writemask must be .x, .y or .xy */
2685         if(dst->write_mask & WINED3DSP_WRITEMASK_0)
2686             shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
2687         if(dst->write_mask & WINED3DSP_WRITEMASK_1)
2688             shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
2689     } else {
2690         /* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8
2691          * helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2.
2692          *
2693          * sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ...
2694          * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ...
2695          *
2696          * The constants we get are:
2697          *
2698          *  +1   +1,     -1     -1     +1      +1      -1       -1
2699          *      ---- ,  ---- , ---- , ----- , ----- , ----- , ------
2700          *      1!*2    2!*4   3!*8   4!*16   5!*32   6!*64   7!*128
2701          *
2702          * If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2):
2703          *
2704          * (x/2)^2 = x^2 / 4
2705          * (x/2)^3 = x^3 / 8
2706          * (x/2)^4 = x^4 / 16
2707          * (x/2)^5 = x^5 / 32
2708          * etc
2709          *
2710          * To get the final result:
2711          * sin(x) = 2 * sin(x/2) * cos(x/2)
2712          * cos(x) = cos(x/2)^2 - sin(x/2)^2
2713          * (from sin(x+y) and cos(x+y) rules)
2714          *
2715          * As per MSDN, dst.z is undefined after the operation, and so is
2716          * dst.x and dst.y if they're masked out by the writemask. Ie
2717          * sincos dst.y, src1, c0, c1
2718          * returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler
2719          * vsa.exe also stops with an error if the dest register is the same register as the source
2720          * register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also
2721          * indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0).
2722          */
2723         shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
2724         shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2);
2725         shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
2726 
2727         shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0);  /* x ^ 2 */
2728         shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0);           /* x ^ 3 */
2729         shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0);           /* x ^ 4 */
2730         shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0);           /* x ^ 5 */
2731         shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0);           /* x ^ 6 */
2732         shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0);           /* x ^ 7 */
2733 
2734         /* sin(x/2)
2735          *
2736          * Unfortunately we don't get the constants in a DP4-capable form. Is there a way to
2737          * properly merge that with MULs in the code above?
2738          * The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe
2739          * we can merge the sine and cosine MAD rows to calculate them together.
2740          */
2741         shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */
2742         shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */
2743         shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */
2744         shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */
2745 
2746         /* cos(x/2) */
2747         shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */
2748         shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */
2749         shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */
2750 
2751         if(dst->write_mask & WINED3DSP_WRITEMASK_0) {
2752             /* cos x */
2753             shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n");
2754             shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name);
2755         }
2756         if(dst->write_mask & WINED3DSP_WRITEMASK_1) {
2757             /* sin x */
2758             shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name);
2759             shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name);
2760         }
2761     }
2762 }
2763 
2764 static void shader_hw_sgn(const struct wined3d_shader_instruction *ins)
2765 {
2766     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2767     char dst_name[50];
2768     char src_name[50];
2769     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
2770 
2771     shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
2772     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name);
2773 
2774     /* SGN is only valid in vertex shaders */
2775     if(ctx->target_version >= NV2) {
2776         shader_addline(buffer, "SSG%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name);
2777         return;
2778     }
2779 
2780     /* If SRC > 0.0, -SRC < SRC = TRUE, otherwise false.
2781      * if SRC < 0.0,  SRC < -SRC = TRUE. If neither is true, src = 0.0
2782      */
2783     if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) {
2784         shader_addline(buffer, "SLT %s, -%s, %s;\n", dst_name, src_name, src_name);
2785     } else {
2786         /* src contains TA? Write to the dest first. This won't overwrite our destination.
2787          * Then use TA, and calculate the final result
2788          *
2789          * Not reading from TA? Store the first result in TA to avoid overwriting the
2790          * destination if src reg = dst reg
2791          */
2792         if(strstr(src_name, "TA"))
2793         {
2794             shader_addline(buffer, "SLT %s,  %s, -%s;\n", dst_name, src_name, src_name);
2795             shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name);
2796             shader_addline(buffer, "ADD %s, %s, -TA;\n", dst_name, dst_name);
2797         }
2798         else
2799         {
2800             shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name);
2801             shader_addline(buffer, "SLT %s,  %s, -%s;\n", dst_name, src_name, src_name);
2802             shader_addline(buffer, "ADD %s, TA, -%s;\n", dst_name, dst_name);
2803         }
2804     }
2805 }
2806 
2807 static void shader_hw_dsy(const struct wined3d_shader_instruction *ins)
2808 {
2809     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2810     char src[50];
2811     char dst[50];
2812     char dst_name[50];
2813     BOOL is_color;
2814 
2815     shader_arb_get_dst_param(ins, &ins->dst[0], dst);
2816     shader_arb_get_src_param(ins, &ins->src[0], 0, src);
2817     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
2818 
2819     shader_addline(buffer, "DDY %s, %s;\n", dst, src);
2820     shader_addline(buffer, "MUL%s %s, %s, ycorrection.y;\n", shader_arb_get_modifier(ins), dst, dst_name);
2821 }
2822 
2823 static void shader_hw_pow(const struct wined3d_shader_instruction *ins)
2824 {
2825     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2826     char src0[50], src1[50], dst[50];
2827     struct wined3d_shader_src_param src0_copy = ins->src[0];
2828     BOOL need_abs = FALSE;
2829     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2830     const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
2831 
2832     /* POW operates on the absolute value of the input */
2833     src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs);
2834 
2835     shader_arb_get_dst_param(ins, &ins->dst[0], dst);
2836     shader_arb_get_src_param(ins, &src0_copy, 0, src0);
2837     shader_arb_get_src_param(ins, &ins->src[1], 1, src1);
2838 
2839     if (need_abs)
2840         shader_addline(buffer, "ABS TA.x, %s;\n", src0);
2841     else
2842         shader_addline(buffer, "MOV TA.x, %s;\n", src0);
2843 
2844     if (priv->target_version >= NV2)
2845     {
2846         shader_addline(buffer, "MOVC TA.y, %s;\n", src1);
2847         shader_addline(buffer, "POW%s %s, TA.x, TA.y;\n", shader_arb_get_modifier(ins), dst);
2848         shader_addline(buffer, "MOV %s (EQ.y), %s;\n", dst, one);
2849     }
2850     else
2851     {
2852         const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
2853         const char *flt_eps = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_EPS);
2854 
2855         shader_addline(buffer, "ABS TA.y, %s;\n", src1);
2856         shader_addline(buffer, "SGE TA.y, -TA.y, %s;\n", zero);
2857         /* Possibly add flt_eps to avoid getting float special values */
2858         shader_addline(buffer, "MAD TA.z, TA.y, %s, %s;\n", flt_eps, src1);
2859         shader_addline(buffer, "POW%s TA.x, TA.x, TA.z;\n", shader_arb_get_modifier(ins));
2860         shader_addline(buffer, "MAD TA.x, -TA.x, TA.y, TA.x;\n");
2861         shader_addline(buffer, "MAD %s, TA.y, %s, TA.x;\n", dst, one);
2862     }
2863 }
2864 
2865 static void shader_hw_loop(const struct wined3d_shader_instruction *ins)
2866 {
2867     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2868     char src_name[50];
2869     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
2870 
2871     /* src0 is aL */
2872     shader_arb_get_src_param(ins, &ins->src[1], 0, src_name);
2873 
2874     if(vshader)
2875     {
2876         struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2877         struct list *e = list_head(&priv->control_frames);
2878         struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
2879 
2880         if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n");
2881         /* The constant loader makes sure to load -1 into iX.w */
2882         shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name);
2883         shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop);
2884         shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop);
2885     }
2886     else
2887     {
2888         shader_addline(buffer, "LOOP %s;\n", src_name);
2889     }
2890 }
2891 
2892 static void shader_hw_rep(const struct wined3d_shader_instruction *ins)
2893 {
2894     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2895     char src_name[50];
2896     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
2897 
2898     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name);
2899 
2900     /* The constant loader makes sure to load -1 into iX.w */
2901     if(vshader)
2902     {
2903         struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2904         struct list *e = list_head(&priv->control_frames);
2905         struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
2906 
2907         if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n");
2908 
2909         shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name);
2910         shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop);
2911         shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop);
2912     }
2913     else
2914     {
2915         shader_addline(buffer, "REP %s;\n", src_name);
2916     }
2917 }
2918 
2919 static void shader_hw_endloop(const struct wined3d_shader_instruction *ins)
2920 {
2921     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2922     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
2923 
2924     if(vshader)
2925     {
2926         struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2927         struct list *e = list_head(&priv->control_frames);
2928         struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
2929 
2930         shader_addline(buffer, "ARAC aL.xy, aL;\n");
2931         shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop);
2932         shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop);
2933 
2934         if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n");
2935     }
2936     else
2937     {
2938         shader_addline(buffer, "ENDLOOP;\n");
2939     }
2940 }
2941 
2942 static void shader_hw_endrep(const struct wined3d_shader_instruction *ins)
2943 {
2944     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2945     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
2946 
2947     if(vshader)
2948     {
2949         struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2950         struct list *e = list_head(&priv->control_frames);
2951         struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
2952 
2953         shader_addline(buffer, "ARAC aL.xy, aL;\n");
2954         shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop);
2955         shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop);
2956 
2957         if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n");
2958     }
2959     else
2960     {
2961         shader_addline(buffer, "ENDREP;\n");
2962     }
2963 }
2964 
2965 static const struct control_frame *find_last_loop(const struct shader_arb_ctx_priv *priv)
2966 {
2967     struct control_frame *control_frame;
2968 
2969     LIST_FOR_EACH_ENTRY(control_frame, &priv->control_frames, struct control_frame, entry)
2970     {
2971         if(control_frame->type == LOOP || control_frame->type == REP) return control_frame;
2972     }
2973     ERR("Could not find loop for break\n");
2974     return NULL;
2975 }
2976 
2977 static void shader_hw_break(const struct wined3d_shader_instruction *ins)
2978 {
2979     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2980     const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data);
2981     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
2982 
2983     if(vshader)
2984     {
2985         shader_addline(buffer, "BRA loop_%u_end;\n", control_frame->no.loop);
2986     }
2987     else
2988     {
2989         shader_addline(buffer, "BRK;\n");
2990     }
2991 }
2992 
2993 static const char *get_compare(enum wined3d_shader_rel_op op)
2994 {
2995     switch (op)
2996     {
2997         case WINED3D_SHADER_REL_OP_GT: return "GT";
2998         case WINED3D_SHADER_REL_OP_EQ: return "EQ";
2999         case WINED3D_SHADER_REL_OP_GE: return "GE";
3000         case WINED3D_SHADER_REL_OP_LT: return "LT";
3001         case WINED3D_SHADER_REL_OP_NE: return "NE";
3002         case WINED3D_SHADER_REL_OP_LE: return "LE";
3003         default:
3004             FIXME("Unrecognized operator %#x.\n", op);
3005             return "(\?\?)";
3006     }
3007 }
3008 
3009 static enum wined3d_shader_rel_op invert_compare(enum wined3d_shader_rel_op op)
3010 {
3011     switch (op)
3012     {
3013         case WINED3D_SHADER_REL_OP_GT: return WINED3D_SHADER_REL_OP_LE;
3014         case WINED3D_SHADER_REL_OP_EQ: return WINED3D_SHADER_REL_OP_NE;
3015         case WINED3D_SHADER_REL_OP_GE: return WINED3D_SHADER_REL_OP_LT;
3016         case WINED3D_SHADER_REL_OP_LT: return WINED3D_SHADER_REL_OP_GE;
3017         case WINED3D_SHADER_REL_OP_NE: return WINED3D_SHADER_REL_OP_EQ;
3018         case WINED3D_SHADER_REL_OP_LE: return WINED3D_SHADER_REL_OP_GT;
3019         default:
3020             FIXME("Unrecognized operator %#x.\n", op);
3021             return -1;
3022     }
3023 }
3024 
3025 static void shader_hw_breakc(const struct wined3d_shader_instruction *ins)
3026 {
3027     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3028     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3029     const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data);
3030     char src_name0[50];
3031     char src_name1[50];
3032     const char *comp = get_compare(ins->flags);
3033 
3034     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
3035     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
3036 
3037     if(vshader)
3038     {
3039         /* SUBC CC, src0, src1" works only in pixel shaders, so use TA to throw
3040          * away the subtraction result
3041          */
3042         shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3043         shader_addline(buffer, "BRA loop_%u_end (%s.x);\n", control_frame->no.loop, comp);
3044     }
3045     else
3046     {
3047         shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3048         shader_addline(buffer, "BRK (%s.x);\n", comp);
3049     }
3050 }
3051 
3052 static void shader_hw_ifc(const struct wined3d_shader_instruction *ins)
3053 {
3054     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3055     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
3056     struct list *e = list_head(&priv->control_frames);
3057     struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
3058     const char *comp;
3059     char src_name0[50];
3060     char src_name1[50];
3061     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3062 
3063     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
3064     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
3065 
3066     if(vshader)
3067     {
3068         /* Invert the flag. We jump to the else label if the condition is NOT true */
3069         comp = get_compare(invert_compare(ins->flags));
3070         shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3071         shader_addline(buffer, "BRA ifc_%u_else (%s.x);\n", control_frame->no.ifc, comp);
3072     }
3073     else
3074     {
3075         comp = get_compare(ins->flags);
3076         shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3077         shader_addline(buffer, "IF %s.x;\n", comp);
3078     }
3079 }
3080 
3081 static void shader_hw_else(const struct wined3d_shader_instruction *ins)
3082 {
3083     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3084     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
3085     struct list *e = list_head(&priv->control_frames);
3086     struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
3087     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3088 
3089     if(vshader)
3090     {
3091         shader_addline(buffer, "BRA ifc_%u_endif;\n", control_frame->no.ifc);
3092         shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc);
3093         control_frame->had_else = TRUE;
3094     }
3095     else
3096     {
3097         shader_addline(buffer, "ELSE;\n");
3098     }
3099 }
3100 
3101 static void shader_hw_endif(const struct wined3d_shader_instruction *ins)
3102 {
3103     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3104     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
3105     struct list *e = list_head(&priv->control_frames);
3106     struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
3107     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3108 
3109     if(vshader)
3110     {
3111         if(control_frame->had_else)
3112         {
3113             shader_addline(buffer, "ifc_%u_endif:\n", control_frame->no.ifc);
3114         }
3115         else
3116         {
3117             shader_addline(buffer, "#No else branch. else is endif\n");
3118             shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc);
3119         }
3120     }
3121     else
3122     {
3123         shader_addline(buffer, "ENDIF;\n");
3124     }
3125 }
3126 
3127 static void shader_hw_texldd(const struct wined3d_shader_instruction *ins)
3128 {
3129     DWORD sampler_idx = ins->src[1].reg.idx[0].offset;
3130     char reg_dest[40];
3131     char reg_src[3][40];
3132     WORD flags = TEX_DERIV;
3133 
3134     shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest);
3135     shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src[0]);
3136     shader_arb_get_src_param(ins, &ins->src[2], 1, reg_src[1]);
3137     shader_arb_get_src_param(ins, &ins->src[3], 2, reg_src[2]);
3138 
3139     if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ;
3140     if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS;
3141 
3142     shader_hw_sample(ins, sampler_idx, reg_dest, reg_src[0], flags, reg_src[1], reg_src[2]);
3143 }
3144 
3145 static void shader_hw_texldl(const struct wined3d_shader_instruction *ins)
3146 {
3147     DWORD sampler_idx = ins->src[1].reg.idx[0].offset;
3148     char reg_dest[40];
3149     char reg_coord[40];
3150     WORD flags = TEX_LOD;
3151 
3152     shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest);
3153     shader_arb_get_src_param(ins, &ins->src[0], 0, reg_coord);
3154 
3155     if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ;
3156     if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS;
3157 
3158     shader_hw_sample(ins, sampler_idx, reg_dest, reg_coord, flags, NULL, NULL);
3159 }
3160 
3161 static void shader_hw_label(const struct wined3d_shader_instruction *ins)
3162 {
3163     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3164     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
3165 
3166     priv->in_main_func = FALSE;
3167     /* Call instructions activate the NV extensions, not labels and rets. If there is an uncalled
3168      * subroutine, don't generate a label that will make GL complain
3169      */
3170     if(priv->target_version == ARB) return;
3171 
3172     shader_addline(buffer, "l%u:\n", ins->src[0].reg.idx[0].offset);
3173 }
3174 
3175 static void vshader_add_footer(struct shader_arb_ctx_priv *priv_ctx,
3176         const struct arb_vshader_private *shader_data, const struct arb_vs_compile_args *args,
3177         const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info,
3178         struct wined3d_string_buffer *buffer)
3179 {
3180     unsigned int i;
3181 
3182     /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
3183      * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
3184      * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
3185      * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
3186      */
3187     if (args->super.fog_src == VS_FOG_Z)
3188     {
3189         shader_addline(buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
3190     }
3191     else
3192     {
3193         if (!reg_maps->fog)
3194         {
3195             /* posFixup.x is always 1.0, so we can safely use it */
3196             shader_addline(buffer, "ADD result.fogcoord, posFixup.x, -posFixup.x;\n");
3197         }
3198         else
3199         {
3200             /* Clamp fogcoord */
3201             const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO);
3202             const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE);
3203 
3204             shader_addline(buffer, "MIN TMP_FOGCOORD.x, TMP_FOGCOORD.x, %s;\n", one);
3205             shader_addline(buffer, "MAX result.fogcoord.x, TMP_FOGCOORD.x, %s;\n", zero);
3206         }
3207     }
3208 
3209     /* Clipplanes are always stored without y inversion */
3210     if (use_nv_clip(gl_info) && priv_ctx->target_version >= NV2)
3211     {
3212         if (args->super.clip_enabled)
3213         {
3214             for (i = 0; i < priv_ctx->vs_clipplanes; i++)
3215             {
3216                 shader_addline(buffer, "DP4 result.clip[%u].x, TMP_OUT, state.clip[%u].plane;\n", i, i);
3217             }
3218         }
3219     }
3220     else if (args->clip.boolclip.clip_texcoord)
3221     {
3222         static const char component[4] = {'x', 'y', 'z', 'w'};
3223         unsigned int cur_clip = 0;
3224         const char *zero = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ZERO);
3225 
3226         for (i = 0; i < gl_info->limits.user_clip_distances; ++i)
3227         {
3228             if (args->clip.boolclip.clipplane_mask & (1u << i))
3229             {
3230                 shader_addline(buffer, "DP4 TA.%c, TMP_OUT, state.clip[%u].plane;\n",
3231                                component[cur_clip++], i);
3232             }
3233         }
3234         switch (cur_clip)
3235         {
3236             case 0:
3237                 shader_addline(buffer, "MOV TA, %s;\n", zero);
3238                 break;
3239             case 1:
3240                 shader_addline(buffer, "MOV TA.yzw, %s;\n", zero);
3241                 break;
3242             case 2:
3243                 shader_addline(buffer, "MOV TA.zw, %s;\n", zero);
3244                 break;
3245             case 3:
3246                 shader_addline(buffer, "MOV TA.w, %s;\n", zero);
3247                 break;
3248         }
3249         shader_addline(buffer, "MOV result.texcoord[%u], TA;\n",
3250                        args->clip.boolclip.clip_texcoord - 1);
3251     }
3252 
3253     /* Write the final position.
3254      *
3255      * OpenGL coordinates specify the center of the pixel while d3d coords specify
3256      * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
3257      * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
3258      * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
3259      */
3260     if (!gl_info->supported[ARB_CLIP_CONTROL])
3261     {
3262         shader_addline(buffer, "MUL TA, posFixup, TMP_OUT.w;\n");
3263         shader_addline(buffer, "ADD TMP_OUT.x, TMP_OUT.x, TA.z;\n");
3264         shader_addline(buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TA.w;\n");
3265 
3266         /* Z coord [0;1]->[-1;1] mapping, see comment in
3267          * get_projection_matrix() in utils.c. */
3268         if (need_helper_const(shader_data, reg_maps, gl_info))
3269         {
3270             const char *two = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_TWO);
3271             shader_addline(buffer, "MAD TMP_OUT.z, TMP_OUT.z, %s, -TMP_OUT.w;\n", two);
3272         }
3273         else
3274         {
3275             shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, TMP_OUT.z;\n");
3276             shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, -TMP_OUT.w;\n");
3277         }
3278     }
3279 
3280     shader_addline(buffer, "MOV result.position, TMP_OUT;\n");
3281 
3282     priv_ctx->footer_written = TRUE;
3283 }
3284 
3285 static void shader_hw_ret(const struct wined3d_shader_instruction *ins)
3286 {
3287     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3288     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
3289     const struct wined3d_shader *shader = ins->ctx->shader;
3290     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3291 
3292     if(priv->target_version == ARB) return;
3293 
3294     if(vshader)
3295     {
3296         if (priv->in_main_func) vshader_add_footer(priv, shader->backend_data,
3297                 priv->cur_vs_args, ins->ctx->reg_maps, ins->ctx->gl_info, buffer);
3298     }
3299 
3300     shader_addline(buffer, "RET;\n");
3301 }
3302 
3303 static void shader_hw_call(const struct wined3d_shader_instruction *ins)
3304 {
3305     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3306     shader_addline(buffer, "CAL l%u;\n", ins->src[0].reg.idx[0].offset);
3307 }
3308 
3309 static BOOL shader_arb_compile(const struct wined3d_gl_info *gl_info, GLenum target, const char *src)
3310 {
3311     const char *ptr, *line;
3312     GLint native, pos;
3313 
3314     if (TRACE_ON(d3d_shader))
3315     {
3316         ptr = src;
3317         while ((line = get_line(&ptr))) TRACE_(d3d_shader)("    %.*s", (int)(ptr - line), line);
3318     }
3319 
3320     GL_EXTCALL(glProgramStringARB(target, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(src), src));
3321     checkGLcall("glProgramStringARB()");
3322 
3323     if (FIXME_ON(d3d_shader))
3324     {
3325         gl_info->gl_ops.gl.p_glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
3326         if (pos != -1)
3327         {
3328             FIXME_(d3d_shader)("Program error at position %d: %s\n\n", pos,
3329                     debugstr_a((const char *)gl_info->gl_ops.gl.p_glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
3330             ptr = src;
3331             while ((line = get_line(&ptr))) FIXME_(d3d_shader)("    %.*s", (int)(ptr - line), line);
3332             FIXME_(d3d_shader)("\n");
3333 
3334             return FALSE;
3335         }
3336     }
3337 
3338     if (WARN_ON(d3d_perf))
3339     {
3340         GL_EXTCALL(glGetProgramivARB(target, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
3341         checkGLcall("glGetProgramivARB()");
3342         if (!native)
3343             WARN_(d3d_perf)("Program exceeds native resource limits.\n");
3344     }
3345 
3346     return TRUE;
3347 }
3348 
3349 static void arbfp_add_sRGB_correction(struct wined3d_string_buffer *buffer, const char *fragcolor,
3350         const char *tmp1, const char *tmp2, const char *tmp3, const char *tmp4, BOOL condcode)
3351 {
3352     /* Perform sRGB write correction. See GLX_EXT_framebuffer_sRGB */
3353 
3354     if(condcode)
3355     {
3356         /* Sigh. MOVC CC doesn't work, so use one of the temps as dummy dest */
3357         shader_addline(buffer, "SUBC %s, %s.x, srgb_consts1.x;\n", tmp1, fragcolor);
3358         /* Calculate the > 0.0031308 case */
3359         shader_addline(buffer, "POW %s.x (GE), %s.x, srgb_consts0.x;\n", fragcolor, fragcolor);
3360         shader_addline(buffer, "POW %s.y (GE), %s.y, srgb_consts0.x;\n", fragcolor, fragcolor);
3361         shader_addline(buffer, "POW %s.z (GE), %s.z, srgb_consts0.x;\n", fragcolor, fragcolor);
3362         shader_addline(buffer, "MUL %s.xyz (GE), %s, srgb_consts0.y;\n", fragcolor, fragcolor);
3363         shader_addline(buffer, "SUB %s.xyz (GE), %s, srgb_consts0.z;\n", fragcolor, fragcolor);
3364         /* Calculate the < case */
3365         shader_addline(buffer, "MUL %s.xyz (LT), srgb_consts0.w, %s;\n", fragcolor, fragcolor);
3366     }
3367     else
3368     {
3369         /* Calculate the > 0.0031308 case */
3370         shader_addline(buffer, "POW %s.x, %s.x, srgb_consts0.x;\n", tmp1, fragcolor);
3371         shader_addline(buffer, "POW %s.y, %s.y, srgb_consts0.x;\n", tmp1, fragcolor);
3372         shader_addline(buffer, "POW %s.z, %s.z, srgb_consts0.x;\n", tmp1, fragcolor);
3373         shader_addline(buffer, "MUL %s, %s, srgb_consts0.y;\n", tmp1, tmp1);
3374         shader_addline(buffer, "SUB %s, %s, srgb_consts0.z;\n", tmp1, tmp1);
3375         /* Calculate the < case */
3376         shader_addline(buffer, "MUL %s, srgb_consts0.w, %s;\n", tmp2, fragcolor);
3377         /* Get 1.0 / 0.0 masks for > 0.0031308 and < 0.0031308 */
3378         shader_addline(buffer, "SLT %s, srgb_consts1.x, %s;\n", tmp3, fragcolor);
3379         shader_addline(buffer, "SGE %s, srgb_consts1.x, %s;\n", tmp4, fragcolor);
3380         /* Store the components > 0.0031308 in the destination */
3381         shader_addline(buffer, "MUL %s.xyz, %s, %s;\n", fragcolor, tmp1, tmp3);
3382         /* Add the components that are < 0.0031308 */
3383         shader_addline(buffer, "MAD %s.xyz, %s, %s, %s;\n", fragcolor, tmp2, tmp4, fragcolor);
3384         /* Move everything into result.color at once. Nvidia hardware cannot handle partial
3385         * result.color writes(.rgb first, then .a), or handle overwriting already written
3386         * components. The assembler uses a temporary register in this case, which is usually
3387         * not allocated from one of our registers that were used earlier.
3388         */
3389     }
3390     /* [0.0;1.0] clamping. Not needed, this is done implicitly */
3391 }
3392 
3393 static const DWORD *find_loop_control_values(const struct wined3d_shader *shader, DWORD idx)
3394 {
3395     const struct wined3d_shader_lconst *constant;
3396 
3397     LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry)
3398     {
3399         if (constant->idx == idx)
3400         {
3401             return constant->value;
3402         }
3403     }
3404     return NULL;
3405 }
3406 
3407 static void init_ps_input(const struct wined3d_shader *shader,
3408         const struct arb_ps_compile_args *args, struct shader_arb_ctx_priv *priv)
3409 {
3410     static const char * const texcoords[8] =
3411     {
3412         "fragment.texcoord[0]", "fragment.texcoord[1]", "fragment.texcoord[2]", "fragment.texcoord[3]",
3413         "fragment.texcoord[4]", "fragment.texcoord[5]", "fragment.texcoord[6]", "fragment.texcoord[7]"
3414     };
3415     unsigned int i;
3416     const struct wined3d_shader_signature_element *input;
3417     const char *semantic_name;
3418     DWORD semantic_idx;
3419 
3420     switch(args->super.vp_mode)
3421     {
3422         case pretransformed:
3423         case fixedfunction:
3424             /* The pixelshader has to collect the varyings on its own. In any case properly load
3425              * color0 and color1. In the case of pretransformed vertices also load texcoords. Set
3426              * other attribs to 0.0.
3427              *
3428              * For fixedfunction this behavior is correct, according to the tests. For pretransformed
3429              * we'd either need a replacement shader that can load other attribs like BINORMAL, or
3430              * load the texcoord attrib pointers to match the pixel shader signature
3431              */
3432             for (i = 0; i < shader->input_signature.element_count; ++i)
3433             {
3434                 input = &shader->input_signature.elements[i];
3435                 if (!(semantic_name = input->semantic_name))
3436                     continue;
3437                 semantic_idx = input->semantic_idx;
3438 
3439                 if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_COLOR))
3440                 {
3441                     if (!semantic_idx)
3442                         priv->ps_input[input->register_idx] = "fragment.color.primary";
3443                     else if (semantic_idx == 1)
3444                         priv->ps_input[input->register_idx] = "fragment.color.secondary";
3445                     else
3446                         priv->ps_input[input->register_idx] = "0.0";
3447                 }
3448                 else if (args->super.vp_mode == fixedfunction)
3449                 {
3450                     priv->ps_input[input->register_idx] = "0.0";
3451                 }
3452                 else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_TEXCOORD))
3453                 {
3454                     if (semantic_idx < 8)
3455                         priv->ps_input[input->register_idx] = texcoords[semantic_idx];
3456                     else
3457                         priv->ps_input[input->register_idx] = "0.0";
3458                 }
3459                 else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_FOG))
3460                 {
3461                     if (!semantic_idx)
3462                         priv->ps_input[input->register_idx] = "fragment.fogcoord";
3463                     else
3464                         priv->ps_input[input->register_idx] = "0.0";
3465                 }
3466                 else
3467                 {
3468                     priv->ps_input[input->register_idx] = "0.0";
3469                 }
3470 
3471                 TRACE("v%u, semantic %s%u is %s\n", input->register_idx,
3472                         semantic_name, semantic_idx, priv->ps_input[input->register_idx]);
3473             }
3474             break;
3475 
3476         case vertexshader:
3477             /* That one is easy. The vertex shaders provide v0-v7 in fragment.texcoord and v8 and v9 in
3478              * fragment.color
3479              */
3480             for(i = 0; i < 8; i++)
3481             {
3482                 priv->ps_input[i] = texcoords[i];
3483             }
3484             priv->ps_input[8] = "fragment.color.primary";
3485             priv->ps_input[9] = "fragment.color.secondary";
3486             break;
3487     }
3488 }
3489 
3490 static void arbfp_add_linear_fog(struct wined3d_string_buffer *buffer,
3491         const char *fragcolor, const char *tmp)
3492 {
3493     shader_addline(buffer, "SUB %s.x, state.fog.params.z, fragment.fogcoord.x;\n", tmp);
3494     shader_addline(buffer, "MUL_SAT %s.x, %s.x, state.fog.params.w;\n", tmp, tmp);
3495     shader_addline(buffer, "LRP %s.rgb, %s.x, %s, state.fog.color;\n", fragcolor, tmp, fragcolor);
3496 }
3497 
3498 /* Context activation is done by the caller. */
3499 static GLuint shader_arb_generate_pshader(const struct wined3d_shader *shader,
3500         const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer,
3501         const struct arb_ps_compile_args *args, struct arb_ps_compiled_shader *compiled)
3502 {
3503     const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
3504     GLuint retval;
3505     char fragcolor[16];
3506     DWORD next_local = 0;
3507     struct shader_arb_ctx_priv priv_ctx;
3508     BOOL dcl_td = FALSE;
3509     BOOL want_nv_prog = FALSE;
3510     struct arb_pshader_private *shader_priv = shader->backend_data;
3511     DWORD map;
3512     BOOL custom_linear_fog = FALSE;
3513 
3514     char srgbtmp[4][4];
3515     char ftoa_tmp[17];
3516     unsigned int i, found = 0;
3517 
3518     for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
3519     {
3520         if (!(map & 1)
3521                 || (shader->u.ps.color0_mov && i == shader->u.ps.color0_reg)
3522                 || (reg_maps->shader_version.major < 2 && !i))
3523             continue;
3524 
3525         sprintf(srgbtmp[found], "R%u", i);
3526         ++found;
3527         if (found == 4) break;
3528     }
3529 
3530     switch(found) {
3531         case 0:
3532             sprintf(srgbtmp[0], "TA");
3533             sprintf(srgbtmp[1], "TB");
3534             sprintf(srgbtmp[2], "TC");
3535             sprintf(srgbtmp[3], "TD");
3536             dcl_td = TRUE;
3537             break;
3538         case 1:
3539             sprintf(srgbtmp[1], "TA");
3540             sprintf(srgbtmp[2], "TB");
3541             sprintf(srgbtmp[3], "TC");
3542             break;
3543         case 2:
3544             sprintf(srgbtmp[2], "TA");
3545             sprintf(srgbtmp[3], "TB");
3546             break;
3547         case 3:
3548             sprintf(srgbtmp[3], "TA");
3549             break;
3550         case 4:
3551             break;
3552     }
3553 
3554     /*  Create the hw ARB shader */
3555     memset(&priv_ctx, 0, sizeof(priv_ctx));
3556     priv_ctx.cur_ps_args = args;
3557     priv_ctx.compiled_fprog = compiled;
3558     priv_ctx.cur_np2fixup_info = &compiled->np2fixup_info;
3559     init_ps_input(shader, args, &priv_ctx);
3560     list_init(&priv_ctx.control_frames);
3561     priv_ctx.ps_post_process = args->super.srgb_correction;
3562 
3563     /* Avoid enabling NV_fragment_program* if we do not need it.
3564      *
3565      * Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register,
3566      * and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation
3567      * is faster than what we gain from using higher native instructions. There are some things though
3568      * that cannot be emulated. In that case enable the extensions.
3569      * If the extension is enabled, instruction handlers that support both ways will use it.
3570      *
3571      * Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program.
3572      * So enable the best we can get.
3573      */
3574     if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0 || reg_maps->usestexldd ||
3575        reg_maps->usestexldl || reg_maps->usesfacing || reg_maps->usesifc || reg_maps->usescall)
3576     {
3577         want_nv_prog = TRUE;
3578     }
3579 
3580     shader_addline(buffer, "!!ARBfp1.0\n");
3581     if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM2])
3582     {
3583         shader_addline(buffer, "OPTION NV_fragment_program2;\n");
3584         priv_ctx.target_version = NV3;
3585     }
3586     else if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION])
3587     {
3588         shader_addline(buffer, "OPTION NV_fragment_program;\n");
3589         priv_ctx.target_version = NV2;
3590     } else {
3591         if(want_nv_prog)
3592         {
3593             /* This is an error - either we're advertising the wrong shader version, or aren't enforcing some
3594              * limits properly
3595              */
3596             ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n");
3597             ERR("Try GLSL\n");
3598         }
3599         priv_ctx.target_version = ARB;
3600     }
3601 
3602     if (reg_maps->rt_mask > 1)
3603     {
3604         shader_addline(buffer, "OPTION ARB_draw_buffers;\n");
3605     }
3606 
3607     if (reg_maps->shader_version.major < 3)
3608     {
3609         switch (args->super.fog)
3610         {
3611             case WINED3D_FFP_PS_FOG_OFF:
3612                 break;
3613             case WINED3D_FFP_PS_FOG_LINEAR:
3614                 if (gl_info->quirks & WINED3D_QUIRK_BROKEN_ARB_FOG)
3615                 {
3616                     custom_linear_fog = TRUE;
3617                     priv_ctx.ps_post_process = TRUE;
3618                     break;
3619                 }
3620                 shader_addline(buffer, "OPTION ARB_fog_linear;\n");
3621                 break;
3622             case WINED3D_FFP_PS_FOG_EXP:
3623                 shader_addline(buffer, "OPTION ARB_fog_exp;\n");
3624                 break;
3625             case WINED3D_FFP_PS_FOG_EXP2:
3626                 shader_addline(buffer, "OPTION ARB_fog_exp2;\n");
3627                 break;
3628         }
3629     }
3630 
3631     /* For now always declare the temps. At least the Nvidia assembler optimizes completely
3632      * unused temps away(but occupies them for the whole shader if they're used once). Always
3633      * declaring them avoids tricky bookkeeping work
3634      */
3635     shader_addline(buffer, "TEMP TA;\n");      /* Used for modifiers */
3636     shader_addline(buffer, "TEMP TB;\n");      /* Used for modifiers */
3637     shader_addline(buffer, "TEMP TC;\n");      /* Used for modifiers */
3638     if(dcl_td) shader_addline(buffer, "TEMP TD;\n"); /* Used for sRGB writing */
3639     shader_addline(buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
3640     shader_addline(buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n");
3641     wined3d_ftoa(eps, ftoa_tmp);
3642     shader_addline(buffer, "PARAM ps_helper_const = { 0.0, 1.0, %s, 0.0 };\n", ftoa_tmp);
3643 
3644     if (reg_maps->shader_version.major < 2)
3645     {
3646         strcpy(fragcolor, "R0");
3647     }
3648     else
3649     {
3650         if (priv_ctx.ps_post_process)
3651         {
3652             if (shader->u.ps.color0_mov)
3653             {
3654                 sprintf(fragcolor, "R%u", shader->u.ps.color0_reg);
3655             }
3656             else
3657             {
3658                 shader_addline(buffer, "TEMP TMP_COLOR;\n");
3659                 strcpy(fragcolor, "TMP_COLOR");
3660             }
3661         } else {
3662             strcpy(fragcolor, "result.color");
3663         }
3664     }
3665 
3666     if (args->super.srgb_correction)
3667     {
3668         shader_addline(buffer, "PARAM srgb_consts0 = ");
3669         shader_arb_append_imm_vec4(buffer, wined3d_srgb_const0);
3670         shader_addline(buffer, ";\n");
3671         shader_addline(buffer, "PARAM srgb_consts1 = ");
3672         shader_arb_append_imm_vec4(buffer, wined3d_srgb_const1);
3673         shader_addline(buffer, ";\n");
3674     }
3675 
3676     /* Base Declarations */
3677     shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info, NULL, &priv_ctx);
3678 
3679     for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i)
3680     {
3681         unsigned char bump_const;
3682 
3683         if (!(map & 1)) continue;
3684 
3685         bump_const = compiled->numbumpenvmatconsts;
3686         compiled->bumpenvmatconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED;
3687         compiled->bumpenvmatconst[bump_const].texunit = i;
3688         compiled->luminanceconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED;
3689         compiled->luminanceconst[bump_const].texunit = i;
3690 
3691         /* We can fit the constants into the constant limit for sure because texbem, texbeml, bem and beml are only supported
3692          * in 1.x shaders, and GL_ARB_fragment_program has a constant limit of 24 constants. So in the worst case we're loading
3693          * 8 shader constants, 8 bump matrices and 8 luminance parameters and are perfectly fine. (No NP2 fixup on bumpmapped
3694          * textures due to conditional NP2 restrictions)
3695          *
3696          * Use local constants to load the bump env parameters, not program.env. This avoids collisions with d3d constants of
3697          * shaders in newer shader models. Since the bump env parameters have to share their space with NP2 fixup constants,
3698          * their location is shader dependent anyway and they cannot be loaded globally.
3699          */
3700         compiled->bumpenvmatconst[bump_const].const_num = next_local++;
3701         shader_addline(buffer, "PARAM bumpenvmat%d = program.local[%d];\n",
3702                        i, compiled->bumpenvmatconst[bump_const].const_num);
3703         compiled->numbumpenvmatconsts = bump_const + 1;
3704 
3705         if (!(reg_maps->luminanceparams & (1u << i)))
3706             continue;
3707 
3708         compiled->luminanceconst[bump_const].const_num = next_local++;
3709         shader_addline(buffer, "PARAM luminance%d = program.local[%d];\n",
3710                        i, compiled->luminanceconst[bump_const].const_num);
3711     }
3712 
3713     for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
3714     {
3715         compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED;
3716         if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2)
3717         {
3718             const DWORD *control_values = find_loop_control_values(shader, i);
3719 
3720             if(control_values)
3721             {
3722                 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i,
3723                                 control_values[0], control_values[1], control_values[2]);
3724             }
3725             else
3726             {
3727                 compiled->int_consts[i] = next_local;
3728                 compiled->num_int_consts++;
3729                 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++);
3730             }
3731         }
3732     }
3733 
3734     if(reg_maps->vpos || reg_maps->usesdsy)
3735     {
3736         compiled->ycorrection = next_local;
3737         shader_addline(buffer, "PARAM ycorrection = program.local[%u];\n", next_local++);
3738 
3739         if(reg_maps->vpos)
3740         {
3741             shader_addline(buffer, "TEMP vpos;\n");
3742             /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen).
3743              * ycorrection.y: -1.0(onscreen), 1.0(offscreen)
3744              * ycorrection.z: 1.0
3745              * ycorrection.w: 0.0
3746              */
3747             shader_addline(buffer, "MAD vpos, fragment.position, ycorrection.zyww, ycorrection.wxww;\n");
3748             shader_addline(buffer, "FLR vpos.xy, vpos;\n");
3749         }
3750     }
3751     else
3752     {
3753         compiled->ycorrection = WINED3D_CONST_NUM_UNUSED;
3754     }
3755 
3756     /* Load constants to fixup NP2 texcoords if there are still free constants left:
3757      * Constants (texture dimensions) for the NP2 fixup are loaded as local program parameters. This will consume
3758      * at most 8 (MAX_FRAGMENT_SAMPLERS / 2) parameters, which is highly unlikely, since the application had to
3759      * use 16 NP2 textures at the same time. In case that we run out of constants the fixup is simply not
3760      * applied / activated. This will probably result in wrong rendering of the texture, but will save us from
3761      * shader compilation errors and the subsequent errors when drawing with this shader. */
3762     if (priv_ctx.cur_ps_args->super.np2_fixup) {
3763         unsigned char cur_fixup_sampler = 0;
3764 
3765         struct arb_ps_np2fixup_info* const fixup = priv_ctx.cur_np2fixup_info;
3766         const WORD map = priv_ctx.cur_ps_args->super.np2_fixup;
3767         const UINT max_lconsts = gl_info->limits.arb_ps_local_constants;
3768 
3769         fixup->offset = next_local;
3770         fixup->super.active = 0;
3771 
3772         for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i)
3773         {
3774             if (!(map & (1u << i)))
3775                 continue;
3776 
3777             if (fixup->offset + (cur_fixup_sampler >> 1) < max_lconsts)
3778             {
3779                 fixup->super.active |= (1u << i);
3780                 fixup->super.idx[i] = cur_fixup_sampler++;
3781             }
3782             else
3783             {
3784                 FIXME("No free constant found to load NP2 fixup data into shader. "
3785                       "Sampling from this texture will probably look wrong.\n");
3786                 break;
3787             }
3788         }
3789 
3790         fixup->super.num_consts = (cur_fixup_sampler + 1) >> 1;
3791         if (fixup->super.num_consts) {
3792             shader_addline(buffer, "PARAM np2fixup[%u] = { program.env[%u..%u] };\n",
3793                            fixup->super.num_consts, fixup->offset, fixup->super.num_consts + fixup->offset - 1);
3794         }
3795     }
3796 
3797     if (shader_priv->clipplane_emulation != ~0U && args->clip)
3798     {
3799         shader_addline(buffer, "KIL fragment.texcoord[%u];\n", shader_priv->clipplane_emulation);
3800     }
3801 
3802     /* Base Shader Body */
3803     if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL)))
3804         return 0;
3805 
3806     if(args->super.srgb_correction) {
3807         arbfp_add_sRGB_correction(buffer, fragcolor, srgbtmp[0], srgbtmp[1], srgbtmp[2], srgbtmp[3],
3808                                   priv_ctx.target_version >= NV2);
3809     }
3810 
3811     if (custom_linear_fog)
3812         arbfp_add_linear_fog(buffer, fragcolor, "TA");
3813 
3814     if(strcmp(fragcolor, "result.color")) {
3815         shader_addline(buffer, "MOV result.color, %s;\n", fragcolor);
3816     }
3817     shader_addline(buffer, "END\n");
3818 
3819     /* TODO: change to resource.glObjectHandle or something like that */
3820     GL_EXTCALL(glGenProgramsARB(1, &retval));
3821 
3822     TRACE("Creating a hw pixel shader, prg=%d\n", retval);
3823     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval));
3824 
3825     TRACE("Created hw pixel shader, prg=%d\n", retval);
3826     if (!shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer->buffer))
3827         return 0;
3828 
3829     return retval;
3830 }
3831 
3832 static int compare_sig(const struct wined3d_shader_signature *sig1, const struct wined3d_shader_signature *sig2)
3833 {
3834     unsigned int i;
3835     int ret;
3836 
3837     if (sig1->element_count != sig2->element_count)
3838         return sig1->element_count < sig2->element_count ? -1 : 1;
3839 
3840     for (i = 0; i < sig1->element_count; ++i)
3841     {
3842         const struct wined3d_shader_signature_element *e1, *e2;
3843 
3844         e1 = &sig1->elements[i];
3845         e2 = &sig2->elements[i];
3846 
3847         if (!e1->semantic_name || !e2->semantic_name)
3848         {
3849             /* Compare pointers, not contents. One string is NULL (element
3850              * does not exist), the other one is not NULL. */
3851             if (e1->semantic_name != e2->semantic_name)
3852                 return e1->semantic_name < e2->semantic_name ? -1 : 1;
3853             continue;
3854         }
3855 
3856         if ((ret = strcmp(e1->semantic_name, e2->semantic_name)))
3857             return ret;
3858         if (e1->semantic_idx != e2->semantic_idx)
3859             return e1->semantic_idx < e2->semantic_idx ? -1 : 1;
3860         if (e1->sysval_semantic != e2->sysval_semantic)
3861             return e1->sysval_semantic < e2->sysval_semantic ? -1 : 1;
3862         if (e1->component_type != e2->component_type)
3863             return e1->component_type < e2->component_type ? -1 : 1;
3864         if (e1->register_idx != e2->register_idx)
3865             return e1->register_idx < e2->register_idx ? -1 : 1;
3866         if (e1->mask != e2->mask)
3867             return e1->mask < e2->mask ? -1 : 1;
3868     }
3869     return 0;
3870 }
3871 
3872 static void clone_sig(struct wined3d_shader_signature *new, const struct wined3d_shader_signature *sig)
3873 {
3874     unsigned int i;
3875     char *name;
3876 
3877     new->element_count = sig->element_count;
3878     new->elements = wined3d_calloc(new->element_count, sizeof(*new->elements));
3879     for (i = 0; i < sig->element_count; ++i)
3880     {
3881         new->elements[i] = sig->elements[i];
3882 
3883         if (!new->elements[i].semantic_name)
3884             continue;
3885 
3886         /* Clone the semantic string */
3887         name = HeapAlloc(GetProcessHeap(), 0, strlen(sig->elements[i].semantic_name) + 1);
3888         strcpy(name, sig->elements[i].semantic_name);
3889         new->elements[i].semantic_name = name;
3890     }
3891 }
3892 
3893 static DWORD find_input_signature(struct shader_arb_priv *priv, const struct wined3d_shader_signature *sig)
3894 {
3895     struct wine_rb_entry *entry = wine_rb_get(&priv->signature_tree, sig);
3896     struct ps_signature *found_sig;
3897 
3898     if (entry)
3899     {
3900         found_sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry);
3901         TRACE("Found existing signature %u\n", found_sig->idx);
3902         return found_sig->idx;
3903     }
3904     found_sig = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*found_sig));
3905     clone_sig(&found_sig->sig, sig);
3906     found_sig->idx = priv->ps_sig_number++;
3907     TRACE("New signature stored and assigned number %u\n", found_sig->idx);
3908     if(wine_rb_put(&priv->signature_tree, sig, &found_sig->entry) == -1)
3909     {
3910         ERR("Failed to insert program entry.\n");
3911     }
3912     return found_sig->idx;
3913 }
3914 
3915 static void init_output_registers(const struct wined3d_shader *shader,
3916         const struct wined3d_shader_signature *ps_input_sig,
3917         struct shader_arb_ctx_priv *priv_ctx, struct arb_vs_compiled_shader *compiled)
3918 {
3919     unsigned int i, j;
3920     static const char * const texcoords[8] =
3921     {
3922         "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]",
3923         "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]"
3924     };
3925     /* Write generic input varyings 0 to 7 to result.texcoord[], varying 8 to result.color.primary
3926      * and varying 9 to result.color.secondary
3927      */
3928     static const char * const decl_idx_to_string[MAX_REG_INPUT] =
3929     {
3930         "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]",
3931         "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]",
3932         "result.color.primary", "result.color.secondary"
3933     };
3934 
3935     if (!ps_input_sig)
3936     {
3937         TRACE("Pixel shader uses builtin varyings\n");
3938         /* Map builtins to builtins */
3939         for(i = 0; i < 8; i++)
3940         {
3941             priv_ctx->texcrd_output[i] = texcoords[i];
3942         }
3943         priv_ctx->color_output[0] = "result.color.primary";
3944         priv_ctx->color_output[1] = "result.color.secondary";
3945         priv_ctx->fog_output = "TMP_FOGCOORD";
3946 
3947         /* Map declared regs to builtins. Use "TA" to /dev/null unread output */
3948         for (i = 0; i < shader->output_signature.element_count; ++i)
3949         {
3950             const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i];
3951 
3952             if (!output->semantic_name)
3953                 continue;
3954 
3955             if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_POSITION))
3956             {
3957                 TRACE("o%u is TMP_OUT\n", output->register_idx);
3958                 if (!output->semantic_idx)
3959                     priv_ctx->vs_output[output->register_idx] = "TMP_OUT";
3960                 else
3961                     priv_ctx->vs_output[output->register_idx] = "TA";
3962             }
3963             else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_PSIZE))
3964             {
3965                 TRACE("o%u is result.pointsize\n", output->register_idx);
3966                 if (!output->semantic_idx)
3967                     priv_ctx->vs_output[output->register_idx] = "result.pointsize";
3968                 else
3969                     priv_ctx->vs_output[output->register_idx] = "TA";
3970             }
3971             else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_COLOR))
3972             {
3973                 TRACE("o%u is result.color.?, idx %u\n", output->register_idx, output->semantic_idx);
3974                 if (!output->semantic_idx)
3975                     priv_ctx->vs_output[output->register_idx] = "result.color.primary";
3976                 else if (output->semantic_idx == 1)
3977                     priv_ctx->vs_output[output->register_idx] = "result.color.secondary";
3978                 else priv_ctx->vs_output[output->register_idx] = "TA";
3979             }
3980             else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_TEXCOORD))
3981             {
3982                 TRACE("o%u is result.texcoord[%u]\n", output->register_idx, output->semantic_idx);
3983                 if (output->semantic_idx >= 8)
3984                     priv_ctx->vs_output[output->register_idx] = "TA";
3985                 else
3986                     priv_ctx->vs_output[output->register_idx] = texcoords[output->semantic_idx];
3987             }
3988             else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_FOG))
3989             {
3990                 TRACE("o%u is result.fogcoord\n", output->register_idx);
3991                 if (output->semantic_idx > 0)
3992                     priv_ctx->vs_output[output->register_idx] = "TA";
3993                 else
3994                     priv_ctx->vs_output[output->register_idx] = "result.fogcoord";
3995             }
3996             else
3997             {
3998                 priv_ctx->vs_output[output->register_idx] = "TA";
3999             }
4000         }
4001         return;
4002     }
4003 
4004     TRACE("Pixel shader uses declared varyings\n");
4005 
4006     /* Map builtin to declared. /dev/null the results by default to the TA temp reg */
4007     for(i = 0; i < 8; i++)
4008     {
4009         priv_ctx->texcrd_output[i] = "TA";
4010     }
4011     priv_ctx->color_output[0] = "TA";
4012     priv_ctx->color_output[1] = "TA";
4013     priv_ctx->fog_output = "TA";
4014 
4015     for (i = 0; i < ps_input_sig->element_count; ++i)
4016     {
4017         const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[i];
4018 
4019         if (!input->semantic_name)
4020             continue;
4021 
4022         /* If a declared input register is not written by builtin arguments, don't write to it.
4023          * GL_NV_vertex_program makes sure the input defaults to 0.0, which is correct with D3D
4024          *
4025          * Don't care about POSITION and PSIZE here - this is a builtin vertex shader, position goes
4026          * to TMP_OUT in any case
4027          */
4028         if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_TEXCOORD))
4029         {
4030             if (input->semantic_idx < 8)
4031                 priv_ctx->texcrd_output[input->semantic_idx] = decl_idx_to_string[input->register_idx];
4032         }
4033         else if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_COLOR))
4034         {
4035             if (input->semantic_idx < 2)
4036                 priv_ctx->color_output[input->semantic_idx] = decl_idx_to_string[input->register_idx];
4037         }
4038         else if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_FOG))
4039         {
4040             if (!input->semantic_idx)
4041                 priv_ctx->fog_output = decl_idx_to_string[input->register_idx];
4042         }
4043         else
4044         {
4045             continue;
4046         }
4047 
4048         if (!strcmp(decl_idx_to_string[input->register_idx], "result.color.primary")
4049                 || !strcmp(decl_idx_to_string[input->register_idx], "result.color.secondary"))
4050         {
4051             compiled->need_color_unclamp = TRUE;
4052         }
4053     }
4054 
4055     /* Map declared to declared */
4056     for (i = 0; i < shader->output_signature.element_count; ++i)
4057     {
4058         const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i];
4059 
4060         /* Write unread output to TA to throw them away */
4061         priv_ctx->vs_output[output->register_idx] = "TA";
4062 
4063         if (!output->semantic_name)
4064             continue;
4065 
4066         if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_POSITION) && !output->semantic_idx)
4067         {
4068             priv_ctx->vs_output[output->register_idx] = "TMP_OUT";
4069             continue;
4070         }
4071         else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_PSIZE) && !output->semantic_idx)
4072         {
4073             priv_ctx->vs_output[output->register_idx] = "result.pointsize";
4074             continue;
4075         }
4076 
4077         for (j = 0; j < ps_input_sig->element_count; ++j)
4078         {
4079             const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[j];
4080 
4081             if (!input->semantic_name)
4082                 continue;
4083 
4084             if (!strcmp(input->semantic_name, output->semantic_name)
4085                     && input->semantic_idx == output->semantic_idx)
4086             {
4087                 priv_ctx->vs_output[output->register_idx] = decl_idx_to_string[input->register_idx];
4088 
4089                 if (!strcmp(priv_ctx->vs_output[output->register_idx], "result.color.primary")
4090                         || !strcmp(priv_ctx->vs_output[output->register_idx], "result.color.secondary"))
4091                 {
4092                     compiled->need_color_unclamp = TRUE;
4093                 }
4094             }
4095         }
4096     }
4097 }
4098 
4099 /* Context activation is done by the caller. */
4100 static GLuint shader_arb_generate_vshader(const struct wined3d_shader *shader,
4101         const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer,
4102         const struct arb_vs_compile_args *args, struct arb_vs_compiled_shader *compiled,
4103         const struct wined3d_shader_signature *ps_input_sig)
4104 {
4105     const struct arb_vshader_private *shader_data = shader->backend_data;
4106     const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
4107     struct shader_arb_priv *priv = shader->device->shader_priv;
4108     GLuint ret;
4109     DWORD next_local = 0;
4110     struct shader_arb_ctx_priv priv_ctx;
4111     unsigned int i;
4112 
4113     memset(&priv_ctx, 0, sizeof(priv_ctx));
4114     priv_ctx.cur_vs_args = args;
4115     list_init(&priv_ctx.control_frames);
4116     init_output_registers(shader, ps_input_sig, &priv_ctx, compiled);
4117 
4118     /*  Create the hw ARB shader */
4119     shader_addline(buffer, "!!ARBvp1.0\n");
4120 
4121     /* Always enable the NV extension if available. Unlike fragment shaders, there is no
4122      * mesurable performance penalty, and we can always make use of it for clipplanes.
4123      */
4124     if (gl_info->supported[NV_VERTEX_PROGRAM3])
4125     {
4126         shader_addline(buffer, "OPTION NV_vertex_program3;\n");
4127         priv_ctx.target_version = NV3;
4128         shader_addline(buffer, "ADDRESS aL;\n");
4129     }
4130     else if (gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])
4131     {
4132         shader_addline(buffer, "OPTION NV_vertex_program2;\n");
4133         priv_ctx.target_version = NV2;
4134         shader_addline(buffer, "ADDRESS aL;\n");
4135     } else {
4136         priv_ctx.target_version = ARB;
4137     }
4138 
4139     shader_addline(buffer, "TEMP TMP_OUT;\n");
4140     if (reg_maps->fog)
4141         shader_addline(buffer, "TEMP TMP_FOGCOORD;\n");
4142     if (need_helper_const(shader_data, reg_maps, gl_info))
4143     {
4144         char ftoa_tmp[17];
4145         wined3d_ftoa(eps, ftoa_tmp);
4146         shader_addline(buffer, "PARAM helper_const = { 0.0, 1.0, 2.0, %s};\n", ftoa_tmp);
4147     }
4148     if (need_rel_addr_const(shader_data, reg_maps, gl_info))
4149     {
4150         shader_addline(buffer, "PARAM rel_addr_const = { 0.5, %d.0, 0.0, 0.0 };\n", shader_data->rel_offset);
4151         shader_addline(buffer, "TEMP A0_SHADOW;\n");
4152     }
4153 
4154     shader_addline(buffer, "TEMP TA;\n");
4155     shader_addline(buffer, "TEMP TB;\n");
4156 
4157     /* Base Declarations */
4158     shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info,
4159             &priv_ctx.vs_clipplanes, &priv_ctx);
4160 
4161     for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
4162     {
4163         compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED;
4164         if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2)
4165         {
4166             const DWORD *control_values = find_loop_control_values(shader, i);
4167 
4168             if(control_values)
4169             {
4170                 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i,
4171                                 control_values[0], control_values[1], control_values[2]);
4172             }
4173             else
4174             {
4175                 compiled->int_consts[i] = next_local;
4176                 compiled->num_int_consts++;
4177                 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++);
4178             }
4179         }
4180     }
4181 
4182     /* We need a constant to fixup the final position */
4183     shader_addline(buffer, "PARAM posFixup = program.local[%u];\n", next_local);
4184     compiled->pos_fixup = next_local++;
4185 
4186     /* Initialize output parameters. GL_ARB_vertex_program does not require special initialization values
4187      * for output parameters. D3D in theory does not do that either, but some applications depend on a
4188      * proper initialization of the secondary color, and programs using the fixed function pipeline without
4189      * a replacement shader depend on the texcoord.w being set properly.
4190      *
4191      * GL_NV_vertex_program defines that all output values are initialized to {0.0, 0.0, 0.0, 1.0}. This
4192      * assertion is in effect even when using GL_ARB_vertex_program without any NV specific additions. So
4193      * skip this if NV_vertex_program is supported. Otherwise, initialize the secondary color. For the tex-
4194      * coords, we have a flag in the opengl caps. Many cards do not require the texcoord being set, and
4195      * this can eat a number of instructions, so skip it unless this cap is set as well
4196      */
4197     if (!gl_info->supported[NV_VERTEX_PROGRAM])
4198     {
4199         const char *color_init = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_0001);
4200         shader_addline(buffer, "MOV result.color.secondary, %s;\n", color_init);
4201 
4202         if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W && !priv->ffp_proj_control)
4203         {
4204             int i;
4205             const char *one = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ONE);
4206             for(i = 0; i < MAX_REG_TEXCRD; i++)
4207             {
4208                 if (reg_maps->u.texcoord_mask[i] && reg_maps->u.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL)
4209                     shader_addline(buffer, "MOV result.texcoord[%u].w, %s\n", i, one);
4210             }
4211         }
4212     }
4213 
4214     /* The shader starts with the main function */
4215     priv_ctx.in_main_func = TRUE;
4216     /* Base Shader Body */
4217     if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL)))
4218         return -1;
4219 
4220     if (!priv_ctx.footer_written) vshader_add_footer(&priv_ctx,
4221             shader_data, args, reg_maps, gl_info, buffer);
4222 
4223     shader_addline(buffer, "END\n");
4224 
4225     /* TODO: change to resource.glObjectHandle or something like that */
4226     GL_EXTCALL(glGenProgramsARB(1, &ret));
4227 
4228     TRACE("Creating a hw vertex shader, prg=%d\n", ret);
4229     GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, ret));
4230 
4231     TRACE("Created hw vertex shader, prg=%d\n", ret);
4232     if (!shader_arb_compile(gl_info, GL_VERTEX_PROGRAM_ARB, buffer->buffer))
4233         return -1;
4234 
4235     return ret;
4236 }
4237 
4238 /* Context activation is done by the caller. */
4239 static struct arb_ps_compiled_shader *find_arb_pshader(struct wined3d_shader *shader,
4240         const struct arb_ps_compile_args *args)
4241 {
4242     struct wined3d_device *device = shader->device;
4243     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4244     const struct wined3d_d3d_info *d3d_info = &device->adapter->d3d_info;
4245     UINT i;
4246     DWORD new_size;
4247     struct arb_ps_compiled_shader *new_array;
4248     struct wined3d_string_buffer buffer;
4249     struct arb_pshader_private *shader_data;
4250     GLuint ret;
4251 
4252     if (!shader->backend_data)
4253     {
4254         struct shader_arb_priv *priv = device->shader_priv;
4255 
4256         shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
4257         shader_data = shader->backend_data;
4258         shader_data->clamp_consts = shader->reg_maps.shader_version.major == 1;
4259 
4260         if (shader->reg_maps.shader_version.major < 3)
4261             shader_data->input_signature_idx = ~0U;
4262         else
4263             shader_data->input_signature_idx = find_input_signature(priv, &shader->input_signature);
4264 
4265         TRACE("Shader got assigned input signature index %u\n", shader_data->input_signature_idx);
4266 
4267         if (!d3d_info->vs_clipping)
4268             shader_data->clipplane_emulation = shader_find_free_input_register(&shader->reg_maps,
4269                     d3d_info->limits.ffp_blend_stages - 1);
4270         else
4271             shader_data->clipplane_emulation = ~0U;
4272     }
4273     shader_data = shader->backend_data;
4274 
4275     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4276      * so a linear search is more performant than a hashmap or a binary search
4277      * (cache coherency etc)
4278      */
4279     for (i = 0; i < shader_data->num_gl_shaders; ++i)
4280     {
4281         if (!memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args)))
4282             return &shader_data->gl_shaders[i];
4283     }
4284 
4285     TRACE("No matching GL shader found, compiling a new shader\n");
4286     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4287         if (shader_data->num_gl_shaders)
4288         {
4289             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4290             new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders,
4291                                     new_size * sizeof(*shader_data->gl_shaders));
4292         } else {
4293             new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders));
4294             new_size = 1;
4295         }
4296 
4297         if(!new_array) {
4298             ERR("Out of memory\n");
4299             return 0;
4300         }
4301         shader_data->gl_shaders = new_array;
4302         shader_data->shader_array_size = new_size;
4303     }
4304 
4305     shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4306 
4307     pixelshader_update_resource_types(shader, args->super.tex_types);
4308 
4309     if (!string_buffer_init(&buffer))
4310     {
4311         ERR("Failed to initialize shader buffer.\n");
4312         return 0;
4313     }
4314 
4315     ret = shader_arb_generate_pshader(shader, gl_info, &buffer, args,
4316             &shader_data->gl_shaders[shader_data->num_gl_shaders]);
4317     string_buffer_free(&buffer);
4318     shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret;
4319 
4320     return &shader_data->gl_shaders[shader_data->num_gl_shaders++];
4321 }
4322 
4323 static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new,
4324                                  const DWORD use_map, BOOL skip_int) {
4325     if((stored->super.swizzle_map & use_map) != new->super.swizzle_map) return FALSE;
4326     if(stored->super.clip_enabled != new->super.clip_enabled) return FALSE;
4327     if(stored->super.fog_src != new->super.fog_src) return FALSE;
4328     if(stored->clip.boolclip_compare != new->clip.boolclip_compare) return FALSE;
4329     if(stored->ps_signature != new->ps_signature) return FALSE;
4330     if(stored->vertex.samplers_compare != new->vertex.samplers_compare) return FALSE;
4331     if(skip_int) return TRUE;
4332 
4333     return !memcmp(stored->loop_ctrl, new->loop_ctrl, sizeof(stored->loop_ctrl));
4334 }
4335 
4336 static struct arb_vs_compiled_shader *find_arb_vshader(struct wined3d_shader *shader,
4337         const struct wined3d_gl_info *gl_info, DWORD use_map, const struct arb_vs_compile_args *args,
4338         const struct wined3d_shader_signature *ps_input_sig)
4339 {
4340     UINT i;
4341     DWORD new_size;
4342     struct arb_vs_compiled_shader *new_array;
4343     struct wined3d_string_buffer buffer;
4344     struct arb_vshader_private *shader_data;
4345     GLuint ret;
4346 
4347     if (!shader->backend_data)
4348     {
4349         const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
4350 
4351         shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
4352         shader_data = shader->backend_data;
4353 
4354         if ((gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT)
4355                 && reg_maps->min_rel_offset <= reg_maps->max_rel_offset)
4356         {
4357             if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 127)
4358             {
4359                 FIXME("The difference between the minimum and maximum relative offset is > 127.\n");
4360                 FIXME("Which this OpenGL implementation does not support. Try using GLSL.\n");
4361                 FIXME("Min: %u, Max: %u.\n", reg_maps->min_rel_offset, reg_maps->max_rel_offset);
4362             }
4363             else if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 63)
4364                 shader_data->rel_offset = reg_maps->min_rel_offset + 63;
4365             else if (reg_maps->max_rel_offset > 63)
4366                 shader_data->rel_offset = reg_maps->min_rel_offset;
4367         }
4368     }
4369     shader_data = shader->backend_data;
4370 
4371     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4372      * so a linear search is more performant than a hashmap or a binary search
4373      * (cache coherency etc)
4374      */
4375     for(i = 0; i < shader_data->num_gl_shaders; i++) {
4376         if (vs_args_equal(&shader_data->gl_shaders[i].args, args,
4377                 use_map, gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]))
4378         {
4379             return &shader_data->gl_shaders[i];
4380         }
4381     }
4382 
4383     TRACE("No matching GL shader found, compiling a new shader\n");
4384 
4385     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4386         if (shader_data->num_gl_shaders)
4387         {
4388             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4389             new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders,
4390                                     new_size * sizeof(*shader_data->gl_shaders));
4391         } else {
4392             new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders));
4393             new_size = 1;
4394         }
4395 
4396         if(!new_array) {
4397             ERR("Out of memory\n");
4398             return 0;
4399         }
4400         shader_data->gl_shaders = new_array;
4401         shader_data->shader_array_size = new_size;
4402     }
4403 
4404     shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4405 
4406     if (!string_buffer_init(&buffer))
4407     {
4408         ERR("Failed to initialize shader buffer.\n");
4409         return 0;
4410     }
4411 
4412     ret = shader_arb_generate_vshader(shader, gl_info, &buffer, args,
4413             &shader_data->gl_shaders[shader_data->num_gl_shaders],
4414             ps_input_sig);
4415     string_buffer_free(&buffer);
4416     shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret;
4417 
4418     return &shader_data->gl_shaders[shader_data->num_gl_shaders++];
4419 }
4420 
4421 static void find_arb_ps_compile_args(const struct wined3d_state *state,
4422         const struct wined3d_context *context, const struct wined3d_shader *shader,
4423         struct arb_ps_compile_args *args)
4424 {
4425     const struct wined3d_gl_info *gl_info = context->gl_info;
4426     const struct wined3d_d3d_info *d3d_info = context->d3d_info;
4427     int i;
4428     WORD int_skip;
4429 
4430     find_ps_compile_args(state, shader, context->stream_info.position_transformed, &args->super, context);
4431 
4432     /* This forces all local boolean constants to 1 to make them stateblock independent */
4433     args->bools = shader->reg_maps.local_bool_consts;
4434 
4435     for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
4436     {
4437         if (state->ps_consts_b[i])
4438             args->bools |= ( 1u << i);
4439     }
4440 
4441     /* Only enable the clip plane emulation KIL if at least one clipplane is enabled. The KIL instruction
4442      * is quite expensive because it forces the driver to disable early Z discards. It is cheaper to
4443      * duplicate the shader than have a no-op KIL instruction in every shader
4444      */
4445     if (!d3d_info->vs_clipping && use_vs(state)
4446             && state->render_states[WINED3D_RS_CLIPPING]
4447             && state->render_states[WINED3D_RS_CLIPPLANEENABLE])
4448         args->clip = 1;
4449     else
4450         args->clip = 0;
4451 
4452     /* Skip if unused or local, or supported natively */
4453     int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts;
4454     if (int_skip == 0xffff || gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION])
4455     {
4456         memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl));
4457         return;
4458     }
4459 
4460     for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
4461     {
4462         if (int_skip & (1u << i))
4463         {
4464             args->loop_ctrl[i][0] = 0;
4465             args->loop_ctrl[i][1] = 0;
4466             args->loop_ctrl[i][2] = 0;
4467         }
4468         else
4469         {
4470             args->loop_ctrl[i][0] = state->ps_consts_i[i].x;
4471             args->loop_ctrl[i][1] = state->ps_consts_i[i].y;
4472             args->loop_ctrl[i][2] = state->ps_consts_i[i].z;
4473         }
4474     }
4475 }
4476 
4477 static void find_arb_vs_compile_args(const struct wined3d_state *state,
4478         const struct wined3d_context *context, const struct wined3d_shader *shader,
4479         struct arb_vs_compile_args *args)
4480 {
4481     const struct wined3d_device *device = shader->device;
4482     const struct wined3d_adapter *adapter = device->adapter;
4483     const struct wined3d_gl_info *gl_info = context->gl_info;
4484     const struct wined3d_d3d_info *d3d_info = context->d3d_info;
4485     int i;
4486     WORD int_skip;
4487 
4488     find_vs_compile_args(state, shader, context->stream_info.swizzle_map, &args->super, context);
4489 
4490     args->clip.boolclip_compare = 0;
4491     if (use_ps(state))
4492     {
4493         const struct wined3d_shader *ps = state->shader[WINED3D_SHADER_TYPE_PIXEL];
4494         const struct arb_pshader_private *shader_priv = ps->backend_data;
4495         args->ps_signature = shader_priv->input_signature_idx;
4496 
4497         args->clip.boolclip.clip_texcoord = shader_priv->clipplane_emulation + 1;
4498     }
4499     else
4500     {
4501         args->ps_signature = ~0;
4502         if (!d3d_info->vs_clipping && adapter->fragment_pipe == &arbfp_fragment_pipeline)
4503             args->clip.boolclip.clip_texcoord = ffp_clip_emul(context) ? d3d_info->limits.ffp_blend_stages : 0;
4504         /* Otherwise: Setting boolclip_compare set clip_texcoord to 0 */
4505     }
4506 
4507     if (args->clip.boolclip.clip_texcoord)
4508     {
4509         if (state->render_states[WINED3D_RS_CLIPPING])
4510             args->clip.boolclip.clipplane_mask = (unsigned char)state->render_states[WINED3D_RS_CLIPPLANEENABLE];
4511         /* clipplane_mask was set to 0 by setting boolclip_compare to 0 */
4512     }
4513 
4514     /* This forces all local boolean constants to 1 to make them stateblock independent */
4515     args->clip.boolclip.bools = shader->reg_maps.local_bool_consts;
4516     /* TODO: Figure out if it would be better to store bool constants as bitmasks in the stateblock */
4517     for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
4518     {
4519         if (state->vs_consts_b[i])
4520             args->clip.boolclip.bools |= (1u << i);
4521     }
4522 
4523     args->vertex.samplers[0] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 0];
4524     args->vertex.samplers[1] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 1];
4525     args->vertex.samplers[2] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 2];
4526     args->vertex.samplers[3] = 0;
4527 
4528     /* Skip if unused or local */
4529     int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts;
4530     /* This is about flow control, not clipping. */
4531     if (int_skip == 0xffff || gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])
4532     {
4533         memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl));
4534         return;
4535     }
4536 
4537     for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
4538     {
4539         if (int_skip & (1u << i))
4540         {
4541             args->loop_ctrl[i][0] = 0;
4542             args->loop_ctrl[i][1] = 0;
4543             args->loop_ctrl[i][2] = 0;
4544         }
4545         else
4546         {
4547             args->loop_ctrl[i][0] = state->vs_consts_i[i].x;
4548             args->loop_ctrl[i][1] = state->vs_consts_i[i].y;
4549             args->loop_ctrl[i][2] = state->vs_consts_i[i].z;
4550         }
4551     }
4552 }
4553 
4554 /* Context activation is done by the caller. */
4555 static void shader_arb_select(void *shader_priv, struct wined3d_context *context,
4556         const struct wined3d_state *state)
4557 {
4558     struct shader_arb_priv *priv = shader_priv;
4559     const struct wined3d_gl_info *gl_info = context->gl_info;
4560     int i;
4561 
4562     /* Deal with pixel shaders first so the vertex shader arg function has the input signature ready */
4563     if (use_ps(state))
4564     {
4565         struct wined3d_shader *ps = state->shader[WINED3D_SHADER_TYPE_PIXEL];
4566         struct arb_ps_compile_args compile_args;
4567         struct arb_ps_compiled_shader *compiled;
4568 
4569         TRACE("Using pixel shader %p.\n", ps);
4570         find_arb_ps_compile_args(state, context, ps, &compile_args);
4571         compiled = find_arb_pshader(ps, &compile_args);
4572         priv->current_fprogram_id = compiled->prgId;
4573         priv->compiled_fprog = compiled;
4574 
4575         /* Bind the fragment program */
4576         GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id));
4577         checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id);");
4578 
4579         if (!priv->use_arbfp_fixed_func)
4580             priv->fragment_pipe->enable_extension(gl_info, FALSE);
4581 
4582         /* Enable OpenGL fragment programs. */
4583         gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB);
4584         checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);");
4585 
4586         TRACE("Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", priv->current_fprogram_id);
4587 
4588         /* Pixel Shader 1.x constants are clamped to [-1;1], Pixel Shader 2.0 constants are not. If switching between
4589          * a 1.x and newer shader, reload the first 8 constants
4590          */
4591         if (priv->last_ps_const_clamped != ((struct arb_pshader_private *)ps->backend_data)->clamp_consts)
4592         {
4593             priv->last_ps_const_clamped = ((struct arb_pshader_private *)ps->backend_data)->clamp_consts;
4594             priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, 8);
4595             for(i = 0; i < 8; i++)
4596             {
4597                 priv->pshader_const_dirty[i] = 1;
4598             }
4599             /* Also takes care of loading local constants */
4600             shader_arb_load_constants_internal(shader_priv, context, state, TRUE, FALSE, TRUE);
4601         }
4602         else
4603         {
4604             UINT rt_height = state->fb->render_targets[0]->height;
4605             shader_arb_ps_local_constants(compiled, context, state, rt_height);
4606         }
4607 
4608         /* Force constant reloading for the NP2 fixup (see comment in shader_glsl_select for more info) */
4609         if (compiled->np2fixup_info.super.active)
4610             context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP;
4611 
4612         if (ps->load_local_constsF)
4613             context->constant_update_mask |= WINED3D_SHADER_CONST_PS_F;
4614     }
4615     else
4616     {
4617         if (gl_info->supported[ARB_FRAGMENT_PROGRAM] && !priv->use_arbfp_fixed_func)
4618         {
4619             /* Disable only if we're not using arbfp fixed function fragment
4620              * processing. If this is used, keep GL_FRAGMENT_PROGRAM_ARB
4621              * enabled, and the fixed function pipeline will bind the fixed
4622              * function replacement shader. */
4623             gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB);
4624             checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
4625             priv->current_fprogram_id = 0;
4626         }
4627         priv->fragment_pipe->enable_extension(gl_info, TRUE);
4628     }
4629 
4630     if (use_vs(state))
4631     {
4632         struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX];
4633         struct arb_vs_compile_args compile_args;
4634         struct arb_vs_compiled_shader *compiled;
4635         const struct wined3d_shader_signature *ps_input_sig;
4636 
4637         TRACE("Using vertex shader %p\n", vs);
4638         find_arb_vs_compile_args(state, context, vs, &compile_args);
4639 
4640         /* Instead of searching for the signature in the signature list, read the one from the
4641          * current pixel shader. It's maybe not the shader where the signature came from, but it
4642          * is the same signature and faster to find. */
4643         if (compile_args.ps_signature == ~0U)
4644             ps_input_sig = NULL;
4645         else
4646             ps_input_sig = &state->shader[WINED3D_SHADER_TYPE_PIXEL]->input_signature;
4647 
4648         compiled = find_arb_vshader(vs, context->gl_info, context->stream_info.use_map,
4649                 &compile_args, ps_input_sig);
4650         priv->current_vprogram_id = compiled->prgId;
4651         priv->compiled_vprog = compiled;
4652 
4653         /* Bind the vertex program */
4654         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id));
4655         checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id);");
4656 
4657         priv->vertex_pipe->vp_enable(gl_info, FALSE);
4658 
4659         /* Enable OpenGL vertex programs */
4660         gl_info->gl_ops.gl.p_glEnable(GL_VERTEX_PROGRAM_ARB);
4661         checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);");
4662         TRACE("Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", priv->current_vprogram_id);
4663         shader_arb_vs_local_constants(compiled, context, state);
4664 
4665         if(priv->last_vs_color_unclamp != compiled->need_color_unclamp) {
4666             priv->last_vs_color_unclamp = compiled->need_color_unclamp;
4667 
4668             if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT])
4669             {
4670                 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, !compiled->need_color_unclamp));
4671                 checkGLcall("glClampColorARB");
4672             } else {
4673                 FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
4674             }
4675         }
4676 
4677         if (vs->load_local_constsF)
4678             context->constant_update_mask |= WINED3D_SHADER_CONST_VS_F;
4679     }
4680     else
4681     {
4682         if (gl_info->supported[ARB_VERTEX_PROGRAM])
4683         {
4684             priv->current_vprogram_id = 0;
4685             gl_info->gl_ops.gl.p_glDisable(GL_VERTEX_PROGRAM_ARB);
4686             checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)");
4687         }
4688         priv->vertex_pipe->vp_enable(gl_info, TRUE);
4689     }
4690 }
4691 
4692 static void shader_arb_select_compute(void *shader_priv, struct wined3d_context *context,
4693         const struct wined3d_state *state)
4694 {
4695     ERR("Compute pipeline not supported by the ARB shader backend.\n");
4696 }
4697 
4698 /* Context activation is done by the caller. */
4699 static void shader_arb_disable(void *shader_priv, struct wined3d_context *context)
4700 {
4701     const struct wined3d_gl_info *gl_info = context->gl_info;
4702     struct shader_arb_priv *priv = shader_priv;
4703 
4704     if (gl_info->supported[ARB_FRAGMENT_PROGRAM])
4705     {
4706         gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB);
4707         checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
4708         priv->current_fprogram_id = 0;
4709     }
4710     priv->fragment_pipe->enable_extension(gl_info, FALSE);
4711 
4712     if (gl_info->supported[ARB_VERTEX_PROGRAM])
4713     {
4714         priv->current_vprogram_id = 0;
4715         gl_info->gl_ops.gl.p_glDisable(GL_VERTEX_PROGRAM_ARB);
4716         checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)");
4717     }
4718     priv->vertex_pipe->vp_enable(gl_info, FALSE);
4719 
4720     if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT] && priv->last_vs_color_unclamp)
4721     {
4722         GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, GL_FIXED_ONLY_ARB));
4723         checkGLcall("glClampColorARB");
4724         priv->last_vs_color_unclamp = FALSE;
4725     }
4726 
4727     context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL)
4728             | (1u << WINED3D_SHADER_TYPE_VERTEX)
4729             | (1u << WINED3D_SHADER_TYPE_GEOMETRY)
4730             | (1u << WINED3D_SHADER_TYPE_HULL)
4731             | (1u << WINED3D_SHADER_TYPE_DOMAIN)
4732             | (1u << WINED3D_SHADER_TYPE_COMPUTE);
4733 }
4734 
4735 static void shader_arb_destroy(struct wined3d_shader *shader)
4736 {
4737     struct wined3d_device *device = shader->device;
4738     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4739 
4740     if (shader_is_pshader_version(shader->reg_maps.shader_version.type))
4741     {
4742         struct arb_pshader_private *shader_data = shader->backend_data;
4743         UINT i;
4744 
4745         if(!shader_data) return; /* This can happen if a shader was never compiled */
4746 
4747         if (shader_data->num_gl_shaders)
4748         {
4749             struct wined3d_context *context = context_acquire(device, NULL, 0);
4750 
4751             for (i = 0; i < shader_data->num_gl_shaders; ++i)
4752             {
4753                 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId));
4754                 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))");
4755             }
4756 
4757             context_release(context);
4758         }
4759 
4760         HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
4761         HeapFree(GetProcessHeap(), 0, shader_data);
4762         shader->backend_data = NULL;
4763     }
4764     else
4765     {
4766         struct arb_vshader_private *shader_data = shader->backend_data;
4767         UINT i;
4768 
4769         if(!shader_data) return; /* This can happen if a shader was never compiled */
4770 
4771         if (shader_data->num_gl_shaders)
4772         {
4773             struct wined3d_context *context = context_acquire(device, NULL, 0);
4774 
4775             for (i = 0; i < shader_data->num_gl_shaders; ++i)
4776             {
4777                 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId));
4778                 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))");
4779             }
4780 
4781             context_release(context);
4782         }
4783 
4784         HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
4785         HeapFree(GetProcessHeap(), 0, shader_data);
4786         shader->backend_data = NULL;
4787     }
4788 }
4789 
4790 static int sig_tree_compare(const void *key, const struct wine_rb_entry *entry)
4791 {
4792     struct ps_signature *e = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry);
4793     return compare_sig(key, &e->sig);
4794 }
4795 
4796 static HRESULT shader_arb_alloc(struct wined3d_device *device, const struct wined3d_vertex_pipe_ops *vertex_pipe,
4797         const struct fragment_pipeline *fragment_pipe)
4798 {
4799     struct shader_arb_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*priv));
4800     struct fragment_caps fragment_caps;
4801     void *vertex_priv, *fragment_priv;
4802     const struct wined3d_d3d_info *d3d_info = &device->adapter->d3d_info;
4803 
4804     if (!(vertex_priv = vertex_pipe->vp_alloc(&arb_program_shader_backend, priv)))
4805     {
4806         ERR("Failed to initialize vertex pipe.\n");
4807         HeapFree(GetProcessHeap(), 0, priv);
4808         return E_FAIL;
4809     }
4810 
4811     if (!(fragment_priv = fragment_pipe->alloc_private(&arb_program_shader_backend, priv)))
4812     {
4813         ERR("Failed to initialize fragment pipe.\n");
4814         vertex_pipe->vp_free(device);
4815         HeapFree(GetProcessHeap(), 0, priv);
4816         return E_FAIL;
4817     }
4818 
4819     memset(priv->vshader_const_dirty, 1,
4820            sizeof(*priv->vshader_const_dirty) * d3d_info->limits.vs_uniform_count);
4821     memset(priv->pshader_const_dirty, 1,
4822             sizeof(*priv->pshader_const_dirty) * d3d_info->limits.ps_uniform_count);
4823 
4824     wine_rb_init(&priv->signature_tree, sig_tree_compare);
4825 
4826     priv->vertex_pipe = vertex_pipe;
4827     priv->fragment_pipe = fragment_pipe;
4828     fragment_pipe->get_caps(&device->adapter->gl_info, &fragment_caps);
4829     priv->ffp_proj_control = fragment_caps.wined3d_caps & WINED3D_FRAGMENT_CAP_PROJ_CONTROL;
4830 
4831     device->vertex_priv = vertex_priv;
4832     device->fragment_priv = fragment_priv;
4833     device->shader_priv = priv;
4834 
4835     return WINED3D_OK;
4836 }
4837 
4838 static void release_signature(struct wine_rb_entry *entry, void *context)
4839 {
4840     struct ps_signature *sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry);
4841     unsigned int i;
4842 
4843     for (i = 0; i < sig->sig.element_count; ++i)
4844     {
4845         HeapFree(GetProcessHeap(), 0, (char *)sig->sig.elements[i].semantic_name);
4846     }
4847     HeapFree(GetProcessHeap(), 0, sig->sig.elements);
4848     HeapFree(GetProcessHeap(), 0, sig);
4849 }
4850 
4851 /* Context activation is done by the caller. */
4852 static void shader_arb_free(struct wined3d_device *device)
4853 {
4854     struct shader_arb_priv *priv = device->shader_priv;
4855 
4856     wine_rb_destroy(&priv->signature_tree, release_signature, NULL);
4857     priv->fragment_pipe->free_private(device);
4858     priv->vertex_pipe->vp_free(device);
4859     HeapFree(GetProcessHeap(), 0, device->shader_priv);
4860 }
4861 
4862 static BOOL shader_arb_allocate_context_data(struct wined3d_context *context)
4863 {
4864     return TRUE;
4865 }
4866 
4867 static void shader_arb_free_context_data(struct wined3d_context *context)
4868 {
4869     struct shader_arb_priv *priv;
4870 
4871     priv = context->device->shader_priv;
4872     if (priv->last_context == context)
4873         priv->last_context = NULL;
4874 }
4875 
4876 static void shader_arb_init_context_state(struct wined3d_context *context) {}
4877 
4878 static void shader_arb_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps)
4879 {
4880     if (gl_info->supported[ARB_VERTEX_PROGRAM])
4881     {
4882         DWORD vs_consts;
4883         UINT vs_version;
4884 
4885         /* 96 is the minimum allowed value of MAX_PROGRAM_ENV_PARAMETERS_ARB
4886          * for vertex programs. If the native limit is less than that it's
4887          * not very useful, and e.g. Mesa swrast returns 0, probably to
4888          * indicate it's a software implementation. */
4889         if (gl_info->limits.arb_vs_native_constants < 96)
4890             vs_consts = gl_info->limits.arb_vs_float_constants;
4891         else
4892             vs_consts = min(gl_info->limits.arb_vs_float_constants, gl_info->limits.arb_vs_native_constants);
4893 
4894         if (gl_info->supported[NV_VERTEX_PROGRAM3])
4895         {
4896             vs_version = 3;
4897             TRACE("Hardware vertex shader version 3.0 enabled (NV_VERTEX_PROGRAM3)\n");
4898         }
4899         else if (vs_consts >= 256)
4900         {
4901             /* Shader Model 2.0 requires at least 256 vertex shader constants */
4902             vs_version = 2;
4903             TRACE("Hardware vertex shader version 2.0 enabled (ARB_PROGRAM)\n");
4904         }
4905         else
4906         {
4907             vs_version = 1;
4908             TRACE("Hardware vertex shader version 1.1 enabled (ARB_PROGRAM)\n");
4909         }
4910         caps->vs_version = min(wined3d_settings.max_sm_vs, vs_version);
4911         caps->vs_uniform_count = min(WINED3D_MAX_VS_CONSTS_F, vs_consts);
4912     }
4913     else
4914     {
4915         caps->vs_version = 0;
4916         caps->vs_uniform_count = 0;
4917     }
4918 
4919     caps->hs_version = 0;
4920     caps->ds_version = 0;
4921     caps->gs_version = 0;
4922     caps->cs_version = 0;
4923 
4924     if (gl_info->supported[ARB_FRAGMENT_PROGRAM])
4925     {
4926         DWORD ps_consts;
4927         UINT ps_version;
4928 
4929         /* Similar as above for vertex programs, but the minimum for fragment
4930          * programs is 24. */
4931         if (gl_info->limits.arb_ps_native_constants < 24)
4932             ps_consts = gl_info->limits.arb_ps_float_constants;
4933         else
4934             ps_consts = min(gl_info->limits.arb_ps_float_constants, gl_info->limits.arb_ps_native_constants);
4935 
4936         if (gl_info->supported[NV_FRAGMENT_PROGRAM2])
4937         {
4938             ps_version = 3;
4939             TRACE("Hardware pixel shader version 3.0 enabled (NV_FRAGMENT_PROGRAM2)\n");
4940         }
4941         else if (ps_consts >= 32)
4942         {
4943             /* Shader Model 2.0 requires at least 32 pixel shader constants */
4944             ps_version = 2;
4945             TRACE("Hardware pixel shader version 2.0 enabled (ARB_PROGRAM)\n");
4946         }
4947         else
4948         {
4949             ps_version = 1;
4950             TRACE("Hardware pixel shader version 1.4 enabled (ARB_PROGRAM)\n");
4951         }
4952         caps->ps_version = min(wined3d_settings.max_sm_ps, ps_version);
4953         caps->ps_uniform_count = min(WINED3D_MAX_PS_CONSTS_F, ps_consts);
4954         caps->ps_1x_max_value = 8.0f;
4955     }
4956     else
4957     {
4958         caps->ps_version = 0;
4959         caps->ps_uniform_count = 0;
4960         caps->ps_1x_max_value = 0.0f;
4961     }
4962 
4963     caps->varying_count = 0;
4964     caps->wined3d_caps = WINED3D_SHADER_CAP_SRGB_WRITE;
4965     if (use_nv_clip(gl_info))
4966         caps->wined3d_caps |= WINED3D_SHADER_CAP_VS_CLIPPING;
4967 }
4968 
4969 static BOOL shader_arb_color_fixup_supported(struct color_fixup_desc fixup)
4970 {
4971     /* We support everything except complex conversions. */
4972     return !is_complex_fixup(fixup);
4973 }
4974 
4975 static void shader_arb_add_instruction_modifiers(const struct wined3d_shader_instruction *ins) {
4976     DWORD shift;
4977     char write_mask[20], regstr[50];
4978     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
4979     BOOL is_color = FALSE;
4980     const struct wined3d_shader_dst_param *dst;
4981 
4982     if (!ins->dst_count) return;
4983 
4984     dst = &ins->dst[0];
4985     shift = dst->shift;
4986     if (!shift) return; /* Saturate alone is handled by the instructions */
4987 
4988     shader_arb_get_write_mask(ins, dst, write_mask);
4989     shader_arb_get_register_name(ins, &dst->reg, regstr, &is_color);
4990 
4991     /* Generate a line that does the output modifier computation
4992      * FIXME: _SAT vs shift? _SAT alone is already handled in the instructions, if this
4993      * maps problems in e.g. _d4_sat modify shader_arb_get_modifier
4994      */
4995     shader_addline(buffer, "MUL%s %s%s, %s, %s;\n", shader_arb_get_modifier(ins),
4996                    regstr, write_mask, regstr, shift_tab[shift]);
4997 }
4998 
4999 static const SHADER_HANDLER shader_arb_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
5000 {
5001     /* WINED3DSIH_ABS                              */ shader_hw_map2gl,
5002     /* WINED3DSIH_ADD                              */ shader_hw_map2gl,
5003     /* WINED3DSIH_AND                              */ NULL,
5004     /* WINED3DSIH_ATOMIC_AND                       */ NULL,
5005     /* WINED3DSIH_ATOMIC_CMP_STORE                 */ NULL,
5006     /* WINED3DSIH_ATOMIC_IADD                      */ NULL,
5007     /* WINED3DSIH_ATOMIC_IMAX                      */ NULL,
5008     /* WINED3DSIH_ATOMIC_IMIN                      */ NULL,
5009     /* WINED3DSIH_ATOMIC_OR                        */ NULL,
5010     /* WINED3DSIH_ATOMIC_UMAX                      */ NULL,
5011     /* WINED3DSIH_ATOMIC_UMIN                      */ NULL,
5012     /* WINED3DSIH_ATOMIC_XOR                       */ NULL,
5013     /* WINED3DSIH_BEM                              */ pshader_hw_bem,
5014     /* WINED3DSIH_BFI                              */ NULL,
5015     /* WINED3DSIH_BFREV                            */ NULL,
5016     /* WINED3DSIH_BREAK                            */ shader_hw_break,
5017     /* WINED3DSIH_BREAKC                           */ shader_hw_breakc,
5018     /* WINED3DSIH_BREAKP                           */ NULL,
5019     /* WINED3DSIH_BUFINFO                          */ NULL,
5020     /* WINED3DSIH_CALL                             */ shader_hw_call,
5021     /* WINED3DSIH_CALLNZ                           */ NULL,
5022     /* WINED3DSIH_CASE                             */ NULL,
5023     /* WINED3DSIH_CMP                              */ pshader_hw_cmp,
5024     /* WINED3DSIH_CND                              */ pshader_hw_cnd,
5025     /* WINED3DSIH_CONTINUE                         */ NULL,
5026     /* WINED3DSIH_CONTINUEP                        */ NULL,
5027     /* WINED3DSIH_COUNTBITS                        */ NULL,
5028     /* WINED3DSIH_CRS                              */ shader_hw_map2gl,
5029     /* WINED3DSIH_CUT                              */ NULL,
5030     /* WINED3DSIH_CUT_STREAM                       */ NULL,
5031     /* WINED3DSIH_DCL                              */ shader_hw_nop,
5032     /* WINED3DSIH_DCL_CONSTANT_BUFFER              */ shader_hw_nop,
5033     /* WINED3DSIH_DCL_FUNCTION_BODY                */ NULL,
5034     /* WINED3DSIH_DCL_FUNCTION_TABLE               */ NULL,
5035     /* WINED3DSIH_DCL_GLOBAL_FLAGS                 */ NULL,
5036     /* WINED3DSIH_DCL_GS_INSTANCES                 */ NULL,
5037     /* WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT */ NULL,
5038     /* WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */ NULL,
5039     /* WINED3DSIH_DCL_HS_MAX_TESSFACTOR            */ NULL,
5040     /* WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER    */ NULL,
5041     /* WINED3DSIH_DCL_INDEX_RANGE                  */ NULL,
5042     /* WINED3DSIH_DCL_INDEXABLE_TEMP               */ NULL,
5043     /* WINED3DSIH_DCL_INPUT                        */ NULL,
5044     /* WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT    */ NULL,
5045     /* WINED3DSIH_DCL_INPUT_PRIMITIVE              */ shader_hw_nop,
5046     /* WINED3DSIH_DCL_INPUT_PS                     */ NULL,
5047     /* WINED3DSIH_DCL_INPUT_PS_SGV                 */ NULL,
5048     /* WINED3DSIH_DCL_INPUT_PS_SIV                 */ NULL,
5049     /* WINED3DSIH_DCL_INPUT_SGV                    */ NULL,
5050     /* WINED3DSIH_DCL_INPUT_SIV                    */ NULL,
5051     /* WINED3DSIH_DCL_INTERFACE                    */ NULL,
5052     /* WINED3DSIH_DCL_OUTPUT                       */ NULL,
5053     /* WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT   */ NULL,
5054     /* WINED3DSIH_DCL_OUTPUT_SIV                   */ NULL,
5055     /* WINED3DSIH_DCL_OUTPUT_TOPOLOGY              */ shader_hw_nop,
5056     /* WINED3DSIH_DCL_RESOURCE_RAW                 */ NULL,
5057     /* WINED3DSIH_DCL_RESOURCE_STRUCTURED          */ NULL,
5058     /* WINED3DSIH_DCL_SAMPLER                      */ NULL,
5059     /* WINED3DSIH_DCL_STREAM                       */ NULL,
5060     /* WINED3DSIH_DCL_TEMPS                        */ NULL,
5061     /* WINED3DSIH_DCL_TESSELLATOR_DOMAIN           */ NULL,
5062     /* WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE */ NULL,
5063     /* WINED3DSIH_DCL_TESSELLATOR_PARTITIONING     */ NULL,
5064     /* WINED3DSIH_DCL_TGSM_RAW                     */ NULL,
5065     /* WINED3DSIH_DCL_TGSM_STRUCTURED              */ NULL,
5066     /* WINED3DSIH_DCL_THREAD_GROUP                 */ NULL,
5067     /* WINED3DSIH_DCL_UAV_RAW                      */ NULL,
5068     /* WINED3DSIH_DCL_UAV_STRUCTURED               */ NULL,
5069     /* WINED3DSIH_DCL_UAV_TYPED                    */ NULL,
5070     /* WINED3DSIH_DCL_VERTICES_OUT                 */ shader_hw_nop,
5071     /* WINED3DSIH_DEF                              */ shader_hw_nop,
5072     /* WINED3DSIH_DEFAULT                          */ NULL,
5073     /* WINED3DSIH_DEFB                             */ shader_hw_nop,
5074     /* WINED3DSIH_DEFI                             */ shader_hw_nop,
5075     /* WINED3DSIH_DIV                              */ NULL,
5076     /* WINED3DSIH_DP2                              */ NULL,
5077     /* WINED3DSIH_DP2ADD                           */ pshader_hw_dp2add,
5078     /* WINED3DSIH_DP3                              */ shader_hw_map2gl,
5079     /* WINED3DSIH_DP4                              */ shader_hw_map2gl,
5080     /* WINED3DSIH_DST                              */ shader_hw_map2gl,
5081     /* WINED3DSIH_DSX                              */ shader_hw_map2gl,
5082     /* WINED3DSIH_DSX_COARSE                       */ NULL,
5083     /* WINED3DSIH_DSX_FINE                         */ NULL,
5084     /* WINED3DSIH_DSY                              */ shader_hw_dsy,
5085     /* WINED3DSIH_DSY_COARSE                       */ NULL,
5086     /* WINED3DSIH_DSY_FINE                         */ NULL,
5087     /* WINED3DSIH_EVAL_SAMPLE_INDEX                */ NULL,
5088     /* WINED3DSIH_ELSE                             */ shader_hw_else,
5089     /* WINED3DSIH_EMIT                             */ NULL,
5090     /* WINED3DSIH_EMIT_STREAM                      */ NULL,
5091     /* WINED3DSIH_ENDIF                            */ shader_hw_endif,
5092     /* WINED3DSIH_ENDLOOP                          */ shader_hw_endloop,
5093     /* WINED3DSIH_ENDREP                           */ shader_hw_endrep,
5094     /* WINED3DSIH_ENDSWITCH                        */ NULL,
5095     /* WINED3DSIH_EQ                               */ NULL,
5096     /* WINED3DSIH_EXP                              */ shader_hw_scalar_op,
5097     /* WINED3DSIH_EXPP                             */ shader_hw_scalar_op,
5098     /* WINED3DSIH_F16TOF32                         */ NULL,
5099     /* WINED3DSIH_F32TOF16                         */ NULL,
5100     /* WINED3DSIH_FCALL                            */ NULL,
5101     /* WINED3DSIH_FIRSTBIT_HI                      */ NULL,
5102     /* WINED3DSIH_FIRSTBIT_LO                      */ NULL,
5103     /* WINED3DSIH_FIRSTBIT_SHI                     */ NULL,
5104     /* WINED3DSIH_FRC                              */ shader_hw_map2gl,
5105     /* WINED3DSIH_FTOI                             */ NULL,
5106     /* WINED3DSIH_FTOU                             */ NULL,
5107     /* WINED3DSIH_GATHER4                          */ NULL,
5108     /* WINED3DSIH_GATHER4_C                        */ NULL,
5109     /* WINED3DSIH_GATHER4_PO                       */ NULL,
5110     /* WINED3DSIH_GATHER4_PO_C                     */ NULL,
5111     /* WINED3DSIH_GE                               */ NULL,
5112     /* WINED3DSIH_HS_CONTROL_POINT_PHASE           */ NULL,
5113     /* WINED3DSIH_HS_DECLS                         */ NULL,
5114     /* WINED3DSIH_HS_FORK_PHASE                    */ NULL,
5115     /* WINED3DSIH_HS_JOIN_PHASE                    */ NULL,
5116     /* WINED3DSIH_IADD                             */ NULL,
5117     /* WINED3DSIH_IBFE                             */ NULL,
5118     /* WINED3DSIH_IEQ                              */ NULL,
5119     /* WINED3DSIH_IF                               */ NULL /* Hardcoded into the shader */,
5120     /* WINED3DSIH_IFC                              */ shader_hw_ifc,
5121     /* WINED3DSIH_IGE                              */ NULL,
5122     /* WINED3DSIH_ILT                              */ NULL,
5123     /* WINED3DSIH_IMAD                             */ NULL,
5124     /* WINED3DSIH_IMAX                             */ NULL,
5125     /* WINED3DSIH_IMIN                             */ NULL,
5126     /* WINED3DSIH_IMM_ATOMIC_ALLOC                 */ NULL,
5127     /* WINED3DSIH_IMM_ATOMIC_AND                   */ NULL,
5128     /* WINED3DSIH_IMM_ATOMIC_CMP_EXCH              */ NULL,
5129     /* WINED3DSIH_IMM_ATOMIC_CONSUME               */ NULL,
5130     /* WINED3DSIH_IMM_ATOMIC_EXCH                  */ NULL,
5131     /* WINED3DSIH_IMM_ATOMIC_IADD                  */ NULL,
5132     /* WINED3DSIH_IMM_ATOMIC_IMAX                  */ NULL,
5133     /* WINED3DSIH_IMM_ATOMIC_IMIN                  */ NULL,
5134     /* WINED3DSIH_IMM_ATOMIC_OR                    */ NULL,
5135     /* WINED3DSIH_IMM_ATOMIC_UMAX                  */ NULL,
5136     /* WINED3DSIH_IMM_ATOMIC_UMIN                  */ NULL,
5137     /* WINED3DSIH_IMM_ATOMIC_XOR                   */ NULL,
5138     /* WINED3DSIH_IMUL                             */ NULL,
5139     /* WINED3DSIH_INE                              */ NULL,
5140     /* WINED3DSIH_INEG                             */ NULL,
5141     /* WINED3DSIH_ISHL                             */ NULL,
5142     /* WINED3DSIH_ISHR                             */ NULL,
5143     /* WINED3DSIH_ITOF                             */ NULL,
5144     /* WINED3DSIH_LABEL                            */ shader_hw_label,
5145     /* WINED3DSIH_LD                               */ NULL,
5146     /* WINED3DSIH_LD2DMS                           */ NULL,
5147     /* WINED3DSIH_LD_RAW                           */ NULL,
5148     /* WINED3DSIH_LD_STRUCTURED                    */ NULL,
5149     /* WINED3DSIH_LD_UAV_TYPED                     */ NULL,
5150     /* WINED3DSIH_LIT                              */ shader_hw_map2gl,
5151     /* WINED3DSIH_LOD                              */ NULL,
5152     /* WINED3DSIH_LOG                              */ shader_hw_scalar_op,
5153     /* WINED3DSIH_LOGP                             */ shader_hw_scalar_op,
5154     /* WINED3DSIH_LOOP                             */ shader_hw_loop,
5155     /* WINED3DSIH_LRP                              */ shader_hw_lrp,
5156     /* WINED3DSIH_LT                               */ NULL,
5157     /* WINED3DSIH_M3x2                             */ shader_hw_mnxn,
5158     /* WINED3DSIH_M3x3                             */ shader_hw_mnxn,
5159     /* WINED3DSIH_M3x4                             */ shader_hw_mnxn,
5160     /* WINED3DSIH_M4x3                             */ shader_hw_mnxn,
5161     /* WINED3DSIH_M4x4                             */ shader_hw_mnxn,
5162     /* WINED3DSIH_MAD                              */ shader_hw_map2gl,
5163     /* WINED3DSIH_MAX                              */ shader_hw_map2gl,
5164     /* WINED3DSIH_MIN                              */ shader_hw_map2gl,
5165     /* WINED3DSIH_MOV                              */ shader_hw_mov,
5166     /* WINED3DSIH_MOVA                             */ shader_hw_mov,
5167     /* WINED3DSIH_MOVC                             */ NULL,
5168     /* WINED3DSIH_MUL                              */ shader_hw_map2gl,
5169     /* WINED3DSIH_NE                               */ NULL,
5170     /* WINED3DSIH_NOP                              */ shader_hw_nop,
5171     /* WINED3DSIH_NOT                              */ NULL,
5172     /* WINED3DSIH_NRM                              */ shader_hw_nrm,
5173     /* WINED3DSIH_OR                               */ NULL,
5174     /* WINED3DSIH_PHASE                            */ shader_hw_nop,
5175     /* WINED3DSIH_POW                              */ shader_hw_pow,
5176     /* WINED3DSIH_RCP                              */ shader_hw_scalar_op,
5177     /* WINED3DSIH_REP                              */ shader_hw_rep,
5178     /* WINED3DSIH_RESINFO                          */ NULL,
5179     /* WINED3DSIH_RET                              */ shader_hw_ret,
5180     /* WINED3DSIH_RETP                             */ NULL,
5181     /* WINED3DSIH_ROUND_NE                         */ NULL,
5182     /* WINED3DSIH_ROUND_NI                         */ NULL,
5183     /* WINED3DSIH_ROUND_PI                         */ NULL,
5184     /* WINED3DSIH_ROUND_Z                          */ NULL,
5185     /* WINED3DSIH_RSQ                              */ shader_hw_scalar_op,
5186     /* WINED3DSIH_SAMPLE                           */ NULL,
5187     /* WINED3DSIH_SAMPLE_B                         */ NULL,
5188     /* WINED3DSIH_SAMPLE_C                         */ NULL,
5189     /* WINED3DSIH_SAMPLE_C_LZ                      */ NULL,
5190     /* WINED3DSIH_SAMPLE_GRAD                      */ NULL,
5191     /* WINED3DSIH_SAMPLE_INFO                      */ NULL,
5192     /* WINED3DSIH_SAMPLE_LOD                       */ NULL,
5193     /* WINED3DSIH_SAMPLE_POS                       */ NULL,
5194     /* WINED3DSIH_SETP                             */ NULL,
5195     /* WINED3DSIH_SGE                              */ shader_hw_map2gl,
5196     /* WINED3DSIH_SGN                              */ shader_hw_sgn,
5197     /* WINED3DSIH_SINCOS                           */ shader_hw_sincos,
5198     /* WINED3DSIH_SLT                              */ shader_hw_map2gl,
5199     /* WINED3DSIH_SQRT                             */ NULL,
5200     /* WINED3DSIH_STORE_RAW                        */ NULL,
5201     /* WINED3DSIH_STORE_STRUCTURED                 */ NULL,
5202     /* WINED3DSIH_STORE_UAV_TYPED                  */ NULL,
5203     /* WINED3DSIH_SUB                              */ shader_hw_map2gl,
5204     /* WINED3DSIH_SWAPC                            */ NULL,
5205     /* WINED3DSIH_SWITCH                           */ NULL,
5206     /* WINED3DSIH_SYNC                             */ NULL,
5207     /* WINED3DSIH_TEX                              */ pshader_hw_tex,
5208     /* WINED3DSIH_TEXBEM                           */ pshader_hw_texbem,
5209     /* WINED3DSIH_TEXBEML                          */ pshader_hw_texbem,
5210     /* WINED3DSIH_TEXCOORD                         */ pshader_hw_texcoord,
5211     /* WINED3DSIH_TEXDEPTH                         */ pshader_hw_texdepth,
5212     /* WINED3DSIH_TEXDP3                           */ pshader_hw_texdp3,
5213     /* WINED3DSIH_TEXDP3TEX                        */ pshader_hw_texdp3tex,
5214     /* WINED3DSIH_TEXKILL                          */ pshader_hw_texkill,
5215     /* WINED3DSIH_TEXLDD                           */ shader_hw_texldd,
5216     /* WINED3DSIH_TEXLDL                           */ shader_hw_texldl,
5217     /* WINED3DSIH_TEXM3x2DEPTH                     */ pshader_hw_texm3x2depth,
5218     /* WINED3DSIH_TEXM3x2PAD                       */ pshader_hw_texm3x2pad,
5219     /* WINED3DSIH_TEXM3x2TEX                       */ pshader_hw_texm3x2tex,
5220     /* WINED3DSIH_TEXM3x3                          */ pshader_hw_texm3x3,
5221     /* WINED3DSIH_TEXM3x3DIFF                      */ NULL,
5222     /* WINED3DSIH_TEXM3x3PAD                       */ pshader_hw_texm3x3pad,
5223     /* WINED3DSIH_TEXM3x3SPEC                      */ pshader_hw_texm3x3spec,
5224     /* WINED3DSIH_TEXM3x3TEX                       */ pshader_hw_texm3x3tex,
5225     /* WINED3DSIH_TEXM3x3VSPEC                     */ pshader_hw_texm3x3vspec,
5226     /* WINED3DSIH_TEXREG2AR                        */ pshader_hw_texreg2ar,
5227     /* WINED3DSIH_TEXREG2GB                        */ pshader_hw_texreg2gb,
5228     /* WINED3DSIH_TEXREG2RGB                       */ pshader_hw_texreg2rgb,
5229     /* WINED3DSIH_UBFE                             */ NULL,
5230     /* WINED3DSIH_UDIV                             */ NULL,
5231     /* WINED3DSIH_UGE                              */ NULL,
5232     /* WINED3DSIH_ULT                              */ NULL,
5233     /* WINED3DSIH_UMAX                             */ NULL,
5234     /* WINED3DSIH_UMIN                             */ NULL,
5235     /* WINED3DSIH_UMUL                             */ NULL,
5236     /* WINED3DSIH_USHR                             */ NULL,
5237     /* WINED3DSIH_UTOF                             */ NULL,
5238     /* WINED3DSIH_XOR                              */ NULL,
5239 };
5240 
5241 static BOOL get_bool_const(const struct wined3d_shader_instruction *ins,
5242         const struct wined3d_shader *shader, DWORD idx)
5243 {
5244     const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
5245     BOOL vshader = shader_is_vshader_version(reg_maps->shader_version.type);
5246     const struct wined3d_shader_lconst *constant;
5247     WORD bools = 0;
5248     WORD flag = (1u << idx);
5249     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
5250 
5251     if (reg_maps->local_bool_consts & flag)
5252     {
5253         /* What good is an if(bool) with a hardcoded local constant? I don't know, but handle it */
5254         LIST_FOR_EACH_ENTRY(constant, &shader->constantsB, struct wined3d_shader_lconst, entry)
5255         {
5256             if (constant->idx == idx)
5257             {
5258                 return constant->value[0];
5259             }
5260         }
5261         ERR("Local constant not found\n");
5262         return FALSE;
5263     }
5264     else
5265     {
5266         if(vshader) bools = priv->cur_vs_args->clip.boolclip.bools;
5267         else bools = priv->cur_ps_args->bools;
5268         return bools & flag;
5269     }
5270 }
5271 
5272 static void get_loop_control_const(const struct wined3d_shader_instruction *ins,
5273         const struct wined3d_shader *shader, UINT idx, struct wined3d_shader_loop_control *loop_control)
5274 {
5275     const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
5276     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
5277 
5278     /* Integer constants can either be a local constant, or they can be stored in the shader
5279      * type specific compile args. */
5280     if (reg_maps->local_int_consts & (1u << idx))
5281     {
5282         const struct wined3d_shader_lconst *constant;
5283 
5284         LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry)
5285         {
5286             if (constant->idx == idx)
5287             {
5288                 loop_control->count = constant->value[0];
5289                 loop_control->start = constant->value[1];
5290                 /* Step is signed. */
5291                 loop_control->step = (int)constant->value[2];
5292                 return;
5293             }
5294         }
5295         /* If this happens the flag was set incorrectly */
5296         ERR("Local constant not found\n");
5297         loop_control->count = 0;
5298         loop_control->start = 0;
5299         loop_control->step = 0;
5300         return;
5301     }
5302 
5303     switch (reg_maps->shader_version.type)
5304     {
5305         case WINED3D_SHADER_TYPE_VERTEX:
5306             /* Count and aL start value are unsigned */
5307             loop_control->count = priv->cur_vs_args->loop_ctrl[idx][0];
5308             loop_control->start = priv->cur_vs_args->loop_ctrl[idx][1];
5309             /* Step is signed. */
5310             loop_control->step = ((char)priv->cur_vs_args->loop_ctrl[idx][2]);
5311             break;
5312 
5313         case WINED3D_SHADER_TYPE_PIXEL:
5314             loop_control->count = priv->cur_ps_args->loop_ctrl[idx][0];
5315             loop_control->start = priv->cur_ps_args->loop_ctrl[idx][1];
5316             loop_control->step = ((char)priv->cur_ps_args->loop_ctrl[idx][2]);
5317             break;
5318 
5319         default:
5320             FIXME("Unhandled shader type %#x.\n", reg_maps->shader_version.type);
5321             break;
5322     }
5323 }
5324 
5325 static void record_instruction(struct list *list, const struct wined3d_shader_instruction *ins)
5326 {
5327     unsigned int i;
5328     struct wined3d_shader_dst_param *dst_param;
5329     struct wined3d_shader_src_param *src_param = NULL, *rel_addr;
5330     struct recorded_instruction *rec = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*rec));
5331     if(!rec)
5332     {
5333         ERR("Out of memory\n");
5334         return;
5335     }
5336 
5337     rec->ins = *ins;
5338     dst_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*dst_param));
5339     if(!dst_param) goto free;
5340     *dst_param = *ins->dst;
5341     if (ins->dst->reg.idx[0].rel_addr)
5342     {
5343         rel_addr = HeapAlloc(GetProcessHeap(), 0, sizeof(*rel_addr));
5344         if (!rel_addr)
5345             goto free;
5346         *rel_addr = *ins->dst->reg.idx[0].rel_addr;
5347         dst_param->reg.idx[0].rel_addr = rel_addr;
5348     }
5349     rec->ins.dst = dst_param;
5350 
5351     if (!(src_param = wined3d_calloc(ins->src_count, sizeof(*src_param))))
5352         goto free;
5353     for (i = 0; i < ins->src_count; ++i)
5354     {
5355         src_param[i] = ins->src[i];
5356         if (ins->src[i].reg.idx[0].rel_addr)
5357         {
5358             rel_addr = HeapAlloc(GetProcessHeap(), 0, sizeof(*rel_addr));
5359             if (!rel_addr)
5360                 goto free;
5361             *rel_addr = *ins->src[i].reg.idx[0].rel_addr;
5362             src_param[i].reg.idx[0].rel_addr = rel_addr;
5363         }
5364     }
5365     rec->ins.src = src_param;
5366     list_add_tail(list, &rec->entry);
5367     return;
5368 
5369 free:
5370     ERR("Out of memory\n");
5371     if(dst_param)
5372     {
5373         HeapFree(GetProcessHeap(), 0, (void *)dst_param->reg.idx[0].rel_addr);
5374         HeapFree(GetProcessHeap(), 0, dst_param);
5375     }
5376     if(src_param)
5377     {
5378         for(i = 0; i < ins->src_count; i++)
5379         {
5380             HeapFree(GetProcessHeap(), 0, (void *)src_param[i].reg.idx[0].rel_addr);
5381         }
5382         HeapFree(GetProcessHeap(), 0, src_param);
5383     }
5384     HeapFree(GetProcessHeap(), 0, rec);
5385 }
5386 
5387 static void free_recorded_instruction(struct list *list)
5388 {
5389     struct recorded_instruction *rec_ins, *entry2;
5390     unsigned int i;
5391 
5392     LIST_FOR_EACH_ENTRY_SAFE(rec_ins, entry2, list, struct recorded_instruction, entry)
5393     {
5394         list_remove(&rec_ins->entry);
5395         if (rec_ins->ins.dst)
5396         {
5397             HeapFree(GetProcessHeap(), 0, (void *)rec_ins->ins.dst->reg.idx[0].rel_addr);
5398             HeapFree(GetProcessHeap(), 0, (void *)rec_ins->ins.dst);
5399         }
5400         if (rec_ins->ins.src)
5401         {
5402             for (i = 0; i < rec_ins->ins.src_count; ++i)
5403             {
5404                 HeapFree(GetProcessHeap(), 0, (void *)rec_ins->ins.src[i].reg.idx[0].rel_addr);
5405             }
5406             HeapFree(GetProcessHeap(), 0, (void *)rec_ins->ins.src);
5407         }
5408         HeapFree(GetProcessHeap(), 0, rec_ins);
5409     }
5410 }
5411 
5412 static void pop_control_frame(const struct wined3d_shader_instruction *ins)
5413 {
5414     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
5415     struct control_frame *control_frame;
5416 
5417     if (ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP)
5418     {
5419         struct list *e = list_head(&priv->control_frames);
5420         control_frame = LIST_ENTRY(e, struct control_frame, entry);
5421         list_remove(&control_frame->entry);
5422         HeapFree(GetProcessHeap(), 0, control_frame);
5423         priv->loop_depth--;
5424     }
5425     else if (ins->handler_idx == WINED3DSIH_ENDIF)
5426     {
5427         /* Non-ifc ENDIFs were already handled previously. */
5428         struct list *e = list_head(&priv->control_frames);
5429         control_frame = LIST_ENTRY(e, struct control_frame, entry);
5430         list_remove(&control_frame->entry);
5431         HeapFree(GetProcessHeap(), 0, control_frame);
5432     }
5433 }
5434 
5435 static void shader_arb_handle_instruction(const struct wined3d_shader_instruction *ins) {
5436     SHADER_HANDLER hw_fct;
5437     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
5438     const struct wined3d_shader *shader = ins->ctx->shader;
5439     struct control_frame *control_frame;
5440     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
5441     BOOL bool_const;
5442 
5443     if(ins->handler_idx == WINED3DSIH_LOOP || ins->handler_idx == WINED3DSIH_REP)
5444     {
5445         control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame));
5446         list_add_head(&priv->control_frames, &control_frame->entry);
5447 
5448         if(ins->handler_idx == WINED3DSIH_LOOP) control_frame->type = LOOP;
5449         if(ins->handler_idx == WINED3DSIH_REP) control_frame->type = REP;
5450 
5451         if(priv->target_version >= NV2)
5452         {
5453             control_frame->no.loop = priv->num_loops++;
5454             priv->loop_depth++;
5455         }
5456         else
5457         {
5458             /* Don't bother recording when we're in a not used if branch */
5459             if(priv->muted)
5460             {
5461                 return;
5462             }
5463 
5464             if(!priv->recording)
5465             {
5466                 list_init(&priv->record);
5467                 priv->recording = TRUE;
5468                 control_frame->outer_loop = TRUE;
5469                 get_loop_control_const(ins, shader, ins->src[0].reg.idx[0].offset, &control_frame->loop_control);
5470                 return; /* Instruction is handled */
5471             }
5472             /* Record this loop in the outer loop's recording */
5473         }
5474     }
5475     else if(ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP)
5476     {
5477         if(priv->target_version >= NV2)
5478         {
5479             /* Nothing to do. The control frame is popped after the HW instr handler */
5480         }
5481         else
5482         {
5483             struct list *e = list_head(&priv->control_frames);
5484             control_frame = LIST_ENTRY(e, struct control_frame, entry);
5485             list_remove(&control_frame->entry);
5486 
5487             if(control_frame->outer_loop)
5488             {
5489                 unsigned int iteration;
5490                 int aL = 0;
5491                 struct list copy;
5492 
5493                 /* Turn off recording before playback */
5494                 priv->recording = FALSE;
5495 
5496                 /* Move the recorded instructions to a separate list and get them out of the private data
5497                  * structure. If there are nested loops, the shader_arb_handle_instruction below will
5498                  * be recorded again, thus priv->record might be overwritten
5499                  */
5500                 list_init(&copy);
5501                 list_move_tail(&copy, &priv->record);
5502                 list_init(&priv->record);
5503 
5504                 if(ins->handler_idx == WINED3DSIH_ENDLOOP)
5505                 {
5506                     shader_addline(buffer, "#unrolling loop: %u iterations, aL=%u, inc %d\n",
5507                                    control_frame->loop_control.count, control_frame->loop_control.start,
5508                                    control_frame->loop_control.step);
5509                     aL = control_frame->loop_control.start;
5510                 }
5511                 else
5512                 {
5513                     shader_addline(buffer, "#unrolling rep: %u iterations\n", control_frame->loop_control.count);
5514                 }
5515 
5516                 for (iteration = 0; iteration < control_frame->loop_control.count; ++iteration)
5517                 {
5518                     struct recorded_instruction *rec_ins;
5519                     if(ins->handler_idx == WINED3DSIH_ENDLOOP)
5520                     {
5521                         priv->aL = aL;
5522                         shader_addline(buffer, "#Iteration %u, aL=%d\n", iteration, aL);
5523                     }
5524                     else
5525                     {
5526                         shader_addline(buffer, "#Iteration %u\n", iteration);
5527                     }
5528 
5529                     LIST_FOR_EACH_ENTRY(rec_ins, &copy, struct recorded_instruction, entry)
5530                     {
5531                         shader_arb_handle_instruction(&rec_ins->ins);
5532                     }
5533 
5534                     if(ins->handler_idx == WINED3DSIH_ENDLOOP)
5535                     {
5536                         aL += control_frame->loop_control.step;
5537                     }
5538                 }
5539                 shader_addline(buffer, "#end loop/rep\n");
5540 
5541                 free_recorded_instruction(&copy);
5542                 HeapFree(GetProcessHeap(), 0, control_frame);
5543                 return; /* Instruction is handled */
5544             }
5545             else
5546             {
5547                 /* This is a nested loop. Proceed to the normal recording function */
5548                 HeapFree(GetProcessHeap(), 0, control_frame);
5549             }
5550         }
5551     }
5552 
5553     if(priv->recording)
5554     {
5555         record_instruction(&priv->record, ins);
5556         return;
5557     }
5558 
5559     /* boolean if */
5560     if(ins->handler_idx == WINED3DSIH_IF)
5561     {
5562         control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame));
5563         list_add_head(&priv->control_frames, &control_frame->entry);
5564         control_frame->type = IF;
5565 
5566         bool_const = get_bool_const(ins, shader, ins->src[0].reg.idx[0].offset);
5567         if (ins->src[0].modifiers == WINED3DSPSM_NOT)
5568             bool_const = !bool_const;
5569         if (!priv->muted && !bool_const)
5570         {
5571             shader_addline(buffer, "#if(FALSE){\n");
5572             priv->muted = TRUE;
5573             control_frame->muting = TRUE;
5574         }
5575         else shader_addline(buffer, "#if(TRUE) {\n");
5576 
5577         return; /* Instruction is handled */
5578     }
5579     else if(ins->handler_idx == WINED3DSIH_IFC)
5580     {
5581         /* IF(bool) and if_cond(a, b) use the same ELSE and ENDIF tokens */
5582         control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame));
5583         control_frame->type = IFC;
5584         control_frame->no.ifc = priv->num_ifcs++;
5585         list_add_head(&priv->control_frames, &control_frame->entry);
5586     }
5587     else if(ins->handler_idx == WINED3DSIH_ELSE)
5588     {
5589         struct list *e = list_head(&priv->control_frames);
5590         control_frame = LIST_ENTRY(e, struct control_frame, entry);
5591 
5592         if(control_frame->type == IF)
5593         {
5594             shader_addline(buffer, "#} else {\n");
5595             if(!priv->muted && !control_frame->muting)
5596             {
5597                 priv->muted = TRUE;
5598                 control_frame->muting = TRUE;
5599             }
5600             else if(control_frame->muting) priv->muted = FALSE;
5601             return; /* Instruction is handled. */
5602         }
5603         /* In case of an ifc, generate a HW shader instruction */
5604         if (control_frame->type != IFC)
5605             ERR("Control frame does not match.\n");
5606     }
5607     else if(ins->handler_idx == WINED3DSIH_ENDIF)
5608     {
5609         struct list *e = list_head(&priv->control_frames);
5610         control_frame = LIST_ENTRY(e, struct control_frame, entry);
5611 
5612         if(control_frame->type == IF)
5613         {
5614             shader_addline(buffer, "#} endif\n");
5615             if(control_frame->muting) priv->muted = FALSE;
5616             list_remove(&control_frame->entry);
5617             HeapFree(GetProcessHeap(), 0, control_frame);
5618             return; /* Instruction is handled */
5619         }
5620         /* In case of an ifc, generate a HW shader instruction */
5621         if (control_frame->type != IFC)
5622             ERR("Control frame does not match.\n");
5623     }
5624 
5625     if(priv->muted)
5626     {
5627         pop_control_frame(ins);
5628         return;
5629     }
5630 
5631     /* Select handler */
5632     hw_fct = shader_arb_instruction_handler_table[ins->handler_idx];
5633 
5634     /* Unhandled opcode */
5635     if (!hw_fct)
5636     {
5637         FIXME("Backend can't handle opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
5638         return;
5639     }
5640     hw_fct(ins);
5641 
5642     pop_control_frame(ins);
5643 
5644     shader_arb_add_instruction_modifiers(ins);
5645 }
5646 
5647 static BOOL shader_arb_has_ffp_proj_control(void *shader_priv)
5648 {
5649     struct shader_arb_priv *priv = shader_priv;
5650 
5651     return priv->ffp_proj_control;
5652 }
5653 
5654 static void shader_arb_precompile(void *shader_priv, struct wined3d_shader *shader) {}
5655 
5656 const struct wined3d_shader_backend_ops arb_program_shader_backend =
5657 {
5658     shader_arb_handle_instruction,
5659     shader_arb_precompile,
5660     shader_arb_select,
5661     shader_arb_select_compute,
5662     shader_arb_disable,
5663     shader_arb_update_float_vertex_constants,
5664     shader_arb_update_float_pixel_constants,
5665     shader_arb_load_constants,
5666     shader_arb_destroy,
5667     shader_arb_alloc,
5668     shader_arb_free,
5669     shader_arb_allocate_context_data,
5670     shader_arb_free_context_data,
5671     shader_arb_init_context_state,
5672     shader_arb_get_caps,
5673     shader_arb_color_fixup_supported,
5674     shader_arb_has_ffp_proj_control,
5675 };
5676 
5677 /* ARB_fragment_program fixed function pipeline replacement definitions */
5678 #define ARB_FFP_CONST_TFACTOR           0
5679 #define ARB_FFP_CONST_COLOR_KEY_LOW     ((ARB_FFP_CONST_TFACTOR) + 1)
5680 #define ARB_FFP_CONST_COLOR_KEY_HIGH    ((ARB_FFP_CONST_COLOR_KEY_LOW) + 1)
5681 #define ARB_FFP_CONST_SPECULAR_ENABLE   ((ARB_FFP_CONST_COLOR_KEY_HIGH) + 1)
5682 #define ARB_FFP_CONST_CONSTANT(i)       ((ARB_FFP_CONST_SPECULAR_ENABLE) + 1 + i)
5683 #define ARB_FFP_CONST_BUMPMAT(i)        ((ARB_FFP_CONST_CONSTANT(7)) + 1 + i)
5684 #define ARB_FFP_CONST_LUMINANCE(i)      ((ARB_FFP_CONST_BUMPMAT(7)) + 1 + i)
5685 
5686 struct arbfp_ffp_desc
5687 {
5688     struct ffp_frag_desc parent;
5689     GLuint shader;
5690 };
5691 
5692 /* Context activation is done by the caller. */
5693 static void arbfp_enable(const struct wined3d_gl_info *gl_info, BOOL enable)
5694 {
5695     if (enable)
5696     {
5697         gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB);
5698         checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");
5699     }
5700     else
5701     {
5702         gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB);
5703         checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
5704     }
5705 }
5706 
5707 static void *arbfp_alloc(const struct wined3d_shader_backend_ops *shader_backend, void *shader_priv)
5708 {
5709     struct shader_arb_priv *priv;
5710 
5711     /* Share private data between the shader backend and the pipeline
5712      * replacement, if both are the arb implementation. This is needed to
5713      * figure out whether ARBfp should be disabled if no pixel shader is bound
5714      * or not. */
5715     if (shader_backend == &arb_program_shader_backend)
5716         priv = shader_priv;
5717     else if (!(priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*priv))))
5718         return NULL;
5719 
5720     wine_rb_init(&priv->fragment_shaders, wined3d_ffp_frag_program_key_compare);
5721     priv->use_arbfp_fixed_func = TRUE;
5722 
5723     return priv;
5724 }
5725 
5726 /* Context activation is done by the caller. */
5727 static void arbfp_free_ffpshader(struct wine_rb_entry *entry, void *context)
5728 {
5729     const struct wined3d_gl_info *gl_info = context;
5730     struct arbfp_ffp_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_ffp_desc, parent.entry);
5731 
5732     GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader));
5733     checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)");
5734     HeapFree(GetProcessHeap(), 0, entry_arb);
5735 }
5736 
5737 /* Context activation is done by the caller. */
5738 static void arbfp_free(struct wined3d_device *device)
5739 {
5740     struct shader_arb_priv *priv = device->fragment_priv;
5741 
5742     wine_rb_destroy(&priv->fragment_shaders, arbfp_free_ffpshader, &device->adapter->gl_info);
5743     priv->use_arbfp_fixed_func = FALSE;
5744 
5745     if (device->shader_backend != &arb_program_shader_backend)
5746     {
5747         HeapFree(GetProcessHeap(), 0, device->fragment_priv);
5748     }
5749 }
5750 
5751 static void arbfp_get_caps(const struct wined3d_gl_info *gl_info, struct fragment_caps *caps)
5752 {
5753     caps->wined3d_caps = WINED3D_FRAGMENT_CAP_PROJ_CONTROL
5754             | WINED3D_FRAGMENT_CAP_SRGB_WRITE
5755             | WINED3D_FRAGMENT_CAP_COLOR_KEY;
5756     caps->PrimitiveMiscCaps = WINED3DPMISCCAPS_TSSARGTEMP;
5757     caps->TextureOpCaps =  WINED3DTEXOPCAPS_DISABLE                     |
5758                            WINED3DTEXOPCAPS_SELECTARG1                  |
5759                            WINED3DTEXOPCAPS_SELECTARG2                  |
5760                            WINED3DTEXOPCAPS_MODULATE4X                  |
5761                            WINED3DTEXOPCAPS_MODULATE2X                  |
5762                            WINED3DTEXOPCAPS_MODULATE                    |
5763                            WINED3DTEXOPCAPS_ADDSIGNED2X                 |
5764                            WINED3DTEXOPCAPS_ADDSIGNED                   |
5765                            WINED3DTEXOPCAPS_ADD                         |
5766                            WINED3DTEXOPCAPS_SUBTRACT                    |
5767                            WINED3DTEXOPCAPS_ADDSMOOTH                   |
5768                            WINED3DTEXOPCAPS_BLENDCURRENTALPHA           |
5769                            WINED3DTEXOPCAPS_BLENDFACTORALPHA            |
5770                            WINED3DTEXOPCAPS_BLENDTEXTUREALPHA           |
5771                            WINED3DTEXOPCAPS_BLENDDIFFUSEALPHA           |
5772                            WINED3DTEXOPCAPS_BLENDTEXTUREALPHAPM         |
5773                            WINED3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR      |
5774                            WINED3DTEXOPCAPS_MODULATECOLOR_ADDALPHA      |
5775                            WINED3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA   |
5776                            WINED3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR   |
5777                            WINED3DTEXOPCAPS_DOTPRODUCT3                 |
5778                            WINED3DTEXOPCAPS_MULTIPLYADD                 |
5779                            WINED3DTEXOPCAPS_LERP                        |
5780                            WINED3DTEXOPCAPS_BUMPENVMAP                  |
5781                            WINED3DTEXOPCAPS_BUMPENVMAPLUMINANCE;
5782 
5783     /* TODO: Implement WINED3DTEXOPCAPS_PREMODULATE */
5784 
5785     caps->MaxTextureBlendStages   = MAX_TEXTURES;
5786     caps->MaxSimultaneousTextures = min(gl_info->limits.samplers[WINED3D_SHADER_TYPE_PIXEL], MAX_TEXTURES);
5787 }
5788 
5789 static DWORD arbfp_get_emul_mask(const struct wined3d_gl_info *gl_info)
5790 {
5791     return GL_EXT_EMUL_ARB_MULTITEXTURE | GL_EXT_EMUL_EXT_FOG_COORD;
5792 }
5793 
5794 static void state_texfactor_arbfp(struct wined3d_context *context,
5795         const struct wined3d_state *state, DWORD state_id)
5796 {
5797     const struct wined3d_gl_info *gl_info = context->gl_info;
5798     struct wined3d_device *device = context->device;
5799     struct wined3d_color color;
5800 
5801     if (device->shader_backend == &arb_program_shader_backend)
5802     {
5803         struct shader_arb_priv *priv;
5804 
5805         /* Don't load the parameter if we're using an arbfp pixel shader,
5806          * otherwise we'll overwrite application provided constants. */
5807         if (use_ps(state))
5808             return;
5809 
5810         priv = device->shader_priv;
5811         priv->pshader_const_dirty[ARB_FFP_CONST_TFACTOR] = 1;
5812         priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_TFACTOR + 1);
5813     }
5814 
5815     wined3d_color_from_d3dcolor(&color, state->render_states[WINED3D_RS_TEXTUREFACTOR]);
5816     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, &color.r));
5817     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, &color.r)");
5818 }
5819 
5820 static void state_tss_constant_arbfp(struct wined3d_context *context,
5821         const struct wined3d_state *state, DWORD state_id)
5822 {
5823     DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1);
5824     const struct wined3d_gl_info *gl_info = context->gl_info;
5825     struct wined3d_device *device = context->device;
5826     struct wined3d_color color;
5827 
5828     if (device->shader_backend == &arb_program_shader_backend)
5829     {
5830         struct shader_arb_priv *priv;
5831 
5832         /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite
5833          * application provided constants.
5834          */
5835         if (use_ps(state))
5836             return;
5837 
5838         priv = device->shader_priv;
5839         priv->pshader_const_dirty[ARB_FFP_CONST_CONSTANT(stage)] = 1;
5840         priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_CONSTANT(stage) + 1);
5841     }
5842 
5843     wined3d_color_from_d3dcolor(&color, state->texture_states[stage][WINED3D_TSS_CONSTANT]);
5844     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_CONSTANT(stage), &color.r));
5845     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_CONSTANT(stage), &color.r)");
5846 }
5847 
5848 static void state_arb_specularenable(struct wined3d_context *context,
5849         const struct wined3d_state *state, DWORD state_id)
5850 {
5851     const struct wined3d_gl_info *gl_info = context->gl_info;
5852     struct wined3d_device *device = context->device;
5853     float col[4];
5854 
5855     if (device->shader_backend == &arb_program_shader_backend)
5856     {
5857         struct shader_arb_priv *priv;
5858 
5859         /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite
5860          * application provided constants.
5861          */
5862         if (use_ps(state))
5863             return;
5864 
5865         priv = device->shader_priv;
5866         priv->pshader_const_dirty[ARB_FFP_CONST_SPECULAR_ENABLE] = 1;
5867         priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_SPECULAR_ENABLE + 1);
5868     }
5869 
5870     if (state->render_states[WINED3D_RS_SPECULARENABLE])
5871     {
5872         /* The specular color has no alpha */
5873         col[0] = 1.0f; col[1] = 1.0f;
5874         col[2] = 1.0f; col[3] = 0.0f;
5875     } else {
5876         col[0] = 0.0f; col[1] = 0.0f;
5877         col[2] = 0.0f; col[3] = 0.0f;
5878     }
5879     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col));
5880     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)");
5881 }
5882 
5883 static void set_bumpmat_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
5884 {
5885     DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1);
5886     const struct wined3d_gl_info *gl_info = context->gl_info;
5887     struct wined3d_device *device = context->device;
5888     float mat[2][2];
5889 
5890     context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV;
5891 
5892     if (device->shader_backend == &arb_program_shader_backend)
5893     {
5894         struct shader_arb_priv *priv = device->shader_priv;
5895 
5896         /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants. */
5897         if (use_ps(state))
5898             return;
5899 
5900         priv->pshader_const_dirty[ARB_FFP_CONST_BUMPMAT(stage)] = 1;
5901         priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_BUMPMAT(stage) + 1);
5902     }
5903 
5904     mat[0][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT00]);
5905     mat[0][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT01]);
5906     mat[1][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT10]);
5907     mat[1][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT11]);
5908 
5909     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0]));
5910     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])");
5911 }
5912 
5913 static void tex_bumpenvlum_arbfp(struct wined3d_context *context,
5914         const struct wined3d_state *state, DWORD state_id)
5915 {
5916     DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1);
5917     const struct wined3d_gl_info *gl_info = context->gl_info;
5918     struct wined3d_device *device = context->device;
5919     float param[4];
5920 
5921     context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV;
5922 
5923     if (device->shader_backend == &arb_program_shader_backend)
5924     {
5925         struct shader_arb_priv *priv = device->shader_priv;
5926 
5927         /* Exit now, don't set the luminance below, otherwise we may overwrite pixel shader constants. */
5928         if (use_ps(state))
5929             return;
5930 
5931         priv->pshader_const_dirty[ARB_FFP_CONST_LUMINANCE(stage)] = 1;
5932         priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_LUMINANCE(stage) + 1);
5933     }
5934 
5935     param[0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LSCALE]);
5936     param[1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LOFFSET]);
5937     param[2] = 0.0f;
5938     param[3] = 0.0f;
5939 
5940     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param));
5941     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)");
5942 }
5943 
5944 static void alpha_test_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
5945 {
5946     const struct wined3d_gl_info *gl_info = context->gl_info;
5947     int glParm;
5948     float ref;
5949 
5950     TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id);
5951 
5952     if (state->render_states[WINED3D_RS_ALPHATESTENABLE])
5953     {
5954         gl_info->gl_ops.gl.p_glEnable(GL_ALPHA_TEST);
5955         checkGLcall("glEnable GL_ALPHA_TEST");
5956     }
5957     else
5958     {
5959         gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
5960         checkGLcall("glDisable GL_ALPHA_TEST");
5961         return;
5962     }
5963 
5964     ref = ((float)state->render_states[WINED3D_RS_ALPHAREF]) / 255.0f;
5965     glParm = wined3d_gl_compare_func(state->render_states[WINED3D_RS_ALPHAFUNC]);
5966 
5967     if (glParm)
5968     {
5969         gl_info->gl_ops.gl.p_glAlphaFunc(glParm, ref);
5970         checkGLcall("glAlphaFunc");
5971     }
5972 }
5973 
5974 static void color_key_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
5975 {
5976     const struct wined3d_texture *texture = state->textures[0];
5977     const struct wined3d_gl_info *gl_info = context->gl_info;
5978     struct wined3d_device *device = context->device;
5979     struct wined3d_color float_key[2];
5980 
5981     if (!texture)
5982         return;
5983 
5984     if (device->shader_backend == &arb_program_shader_backend)
5985     {
5986         struct shader_arb_priv *priv;
5987 
5988         /* Don't load the parameter if we're using an arbfp pixel shader,
5989          * otherwise we'll overwrite application provided constants. */
5990         if (use_ps(state))
5991             return;
5992 
5993         priv = device->shader_priv;
5994         priv->pshader_const_dirty[ARB_FFP_CONST_COLOR_KEY_LOW] = 1;
5995         priv->pshader_const_dirty[ARB_FFP_CONST_COLOR_KEY_HIGH] = 1;
5996         priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_COLOR_KEY_HIGH + 1);
5997     }
5998 
5999     wined3d_format_get_float_color_key(texture->resource.format, &texture->async.src_blt_color_key, float_key);
6000 
6001     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_LOW, &float_key[0].r));
6002     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_LOW, &float_key[0].r)");
6003     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_HIGH, &float_key[1].r));
6004     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_HIGH, &float_key[1].r)");
6005 }
6006 
6007 static const char *get_argreg(struct wined3d_string_buffer *buffer, DWORD argnum, unsigned int stage, DWORD arg)
6008 {
6009     const char *ret;
6010 
6011     if(arg == ARG_UNUSED) return "unused"; /* This is the marker for unused registers */
6012 
6013     switch(arg & WINED3DTA_SELECTMASK) {
6014         case WINED3DTA_DIFFUSE:
6015             ret = "fragment.color.primary"; break;
6016 
6017         case WINED3DTA_CURRENT:
6018             ret = "ret";
6019             break;
6020 
6021         case WINED3DTA_TEXTURE:
6022             switch(stage) {
6023                 case 0: ret = "tex0"; break;
6024                 case 1: ret = "tex1"; break;
6025                 case 2: ret = "tex2"; break;
6026                 case 3: ret = "tex3"; break;
6027                 case 4: ret = "tex4"; break;
6028                 case 5: ret = "tex5"; break;
6029                 case 6: ret = "tex6"; break;
6030                 case 7: ret = "tex7"; break;
6031                 default: ret = "unknown texture";
6032             }
6033             break;
6034 
6035         case WINED3DTA_TFACTOR:
6036             ret = "tfactor"; break;
6037 
6038         case WINED3DTA_SPECULAR:
6039             ret = "fragment.color.secondary"; break;
6040 
6041         case WINED3DTA_TEMP:
6042             ret = "tempreg"; break;
6043 
6044         case WINED3DTA_CONSTANT:
6045             switch(stage) {
6046                 case 0: ret = "const0"; break;
6047                 case 1: ret = "const1"; break;
6048                 case 2: ret = "const2"; break;
6049                 case 3: ret = "const3"; break;
6050                 case 4: ret = "const4"; break;
6051                 case 5: ret = "const5"; break;
6052                 case 6: ret = "const6"; break;
6053                 case 7: ret = "const7"; break;
6054                 default: ret = "unknown constant";
6055             }
6056             break;
6057 
6058         default:
6059             return "unknown";
6060     }
6061 
6062     if(arg & WINED3DTA_COMPLEMENT) {
6063         shader_addline(buffer, "SUB arg%u, const.x, %s;\n", argnum, ret);
6064         if(argnum == 0) ret = "arg0";
6065         if(argnum == 1) ret = "arg1";
6066         if(argnum == 2) ret = "arg2";
6067     }
6068     if(arg & WINED3DTA_ALPHAREPLICATE) {
6069         shader_addline(buffer, "MOV arg%u, %s.w;\n", argnum, ret);
6070         if(argnum == 0) ret = "arg0";
6071         if(argnum == 1) ret = "arg1";
6072         if(argnum == 2) ret = "arg2";
6073     }
6074     return ret;
6075 }
6076 
6077 static void gen_ffp_instr(struct wined3d_string_buffer *buffer, unsigned int stage, BOOL color,
6078         BOOL alpha, DWORD dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2)
6079 {
6080     const char *dstmask, *dstreg, *arg0, *arg1, *arg2;
6081     unsigned int mul = 1;
6082 
6083     if(color && alpha) dstmask = "";
6084     else if(color) dstmask = ".xyz";
6085     else dstmask = ".w";
6086 
6087     if(dst == tempreg) dstreg = "tempreg";
6088     else dstreg = "ret";
6089 
6090     arg0 = get_argreg(buffer, 0, stage, dw_arg0);
6091     arg1 = get_argreg(buffer, 1, stage, dw_arg1);
6092     arg2 = get_argreg(buffer, 2, stage, dw_arg2);
6093 
6094     switch (op)
6095     {
6096         case WINED3D_TOP_DISABLE:
6097             break;
6098 
6099         case WINED3D_TOP_SELECT_ARG2:
6100             arg1 = arg2;
6101             /* FALLTHROUGH */
6102         case WINED3D_TOP_SELECT_ARG1:
6103             shader_addline(buffer, "MOV %s%s, %s;\n", dstreg, dstmask, arg1);
6104             break;
6105 
6106         case WINED3D_TOP_MODULATE_4X:
6107             mul = 2;
6108             /* FALLTHROUGH */
6109         case WINED3D_TOP_MODULATE_2X:
6110             mul *= 2;
6111             /* FALLTHROUGH */
6112         case WINED3D_TOP_MODULATE:
6113             shader_addline(buffer, "MUL %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
6114             break;
6115 
6116         case WINED3D_TOP_ADD_SIGNED_2X:
6117             mul = 2;
6118             /* FALLTHROUGH */
6119         case WINED3D_TOP_ADD_SIGNED:
6120             shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2);
6121             arg2 = "arg2";
6122             /* FALLTHROUGH */
6123         case WINED3D_TOP_ADD:
6124             shader_addline(buffer, "ADD_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
6125             break;
6126 
6127         case WINED3D_TOP_SUBTRACT:
6128             shader_addline(buffer, "SUB_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
6129             break;
6130 
6131         case WINED3D_TOP_ADD_SMOOTH:
6132             shader_addline(buffer, "SUB arg1, const.x, %s;\n", arg1);
6133             shader_addline(buffer, "MAD_SAT %s%s, arg1, %s, %s;\n", dstreg, dstmask, arg2, arg1);
6134             break;
6135 
6136         case WINED3D_TOP_BLEND_CURRENT_ALPHA:
6137             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_CURRENT);
6138             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
6139             break;
6140         case WINED3D_TOP_BLEND_FACTOR_ALPHA:
6141             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TFACTOR);
6142             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
6143             break;
6144         case WINED3D_TOP_BLEND_TEXTURE_ALPHA:
6145             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE);
6146             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
6147             break;
6148         case WINED3D_TOP_BLEND_DIFFUSE_ALPHA:
6149             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_DIFFUSE);
6150             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
6151             break;
6152 
6153         case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM:
6154             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE);
6155             shader_addline(buffer, "SUB arg0.w, const.x, %s.w;\n", arg0);
6156             shader_addline(buffer, "MAD_SAT %s%s, %s, arg0.w, %s;\n", dstreg, dstmask, arg2, arg1);
6157             break;
6158 
6159         /* D3DTOP_PREMODULATE ???? */
6160 
6161         case WINED3D_TOP_MODULATE_INVALPHA_ADD_COLOR:
6162             shader_addline(buffer, "SUB arg0.w, const.x, %s;\n", arg1);
6163             shader_addline(buffer, "MAD_SAT %s%s, arg0.w, %s, %s;\n", dstreg, dstmask, arg2, arg1);
6164             break;
6165         case WINED3D_TOP_MODULATE_ALPHA_ADD_COLOR:
6166             shader_addline(buffer, "MAD_SAT %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg1);
6167             break;
6168         case WINED3D_TOP_MODULATE_INVCOLOR_ADD_ALPHA:
6169             shader_addline(buffer, "SUB arg0, const.x, %s;\n", arg1);
6170             shader_addline(buffer, "MAD_SAT %s%s, arg0, %s, %s.w;\n", dstreg, dstmask, arg2, arg1);
6171             break;
6172         case WINED3D_TOP_MODULATE_COLOR_ADD_ALPHA:
6173             shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s.w;\n", dstreg, dstmask, arg1, arg2, arg1);
6174             break;
6175 
6176         case WINED3D_TOP_DOTPRODUCT3:
6177             mul = 4;
6178             shader_addline(buffer, "SUB arg1, %s, const.w;\n", arg1);
6179             shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2);
6180             shader_addline(buffer, "DP3_SAT %s%s, arg1, arg2;\n", dstreg, dstmask);
6181             break;
6182 
6183         case WINED3D_TOP_MULTIPLY_ADD:
6184             shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg0);
6185             break;
6186 
6187         case WINED3D_TOP_LERP:
6188             /* The msdn is not quite right here */
6189             shader_addline(buffer, "LRP %s%s, %s, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
6190             break;
6191 
6192         case WINED3D_TOP_BUMPENVMAP:
6193         case WINED3D_TOP_BUMPENVMAP_LUMINANCE:
6194             /* Those are handled in the first pass of the shader(generation pass 1 and 2) already */
6195             break;
6196 
6197         default:
6198             FIXME("Unhandled texture op %08x\n", op);
6199     }
6200 
6201     if (mul == 2)
6202         shader_addline(buffer, "MUL_SAT %s%s, %s, const.y;\n", dstreg, dstmask, dstreg);
6203     else if (mul == 4)
6204         shader_addline(buffer, "MUL_SAT %s%s, %s, const.z;\n", dstreg, dstmask, dstreg);
6205 }
6206 
6207 static const char *arbfp_texture_target(enum wined3d_gl_resource_type type)
6208 {
6209     switch(type)
6210     {
6211         case WINED3D_GL_RES_TYPE_TEX_1D:
6212             return "1D";
6213         case WINED3D_GL_RES_TYPE_TEX_2D:
6214             return "2D";
6215         case WINED3D_GL_RES_TYPE_TEX_3D:
6216             return "3D";
6217         case WINED3D_GL_RES_TYPE_TEX_CUBE:
6218             return "CUBE";
6219         case WINED3D_GL_RES_TYPE_TEX_RECT:
6220             return "RECT";
6221         default:
6222             return "unexpected_resource_type";
6223     }
6224 }
6225 
6226 static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, const struct wined3d_gl_info *gl_info)
6227 {
6228     BYTE tex_read = 0, bump_used = 0, luminance_used = 0, constant_used = 0;
6229     BOOL tempreg_used = FALSE, tfactor_used = FALSE;
6230     unsigned int stage, lowest_disabled_stage;
6231     struct wined3d_string_buffer buffer;
6232     struct color_fixup_masks masks;
6233     BOOL custom_linear_fog = FALSE;
6234     const char *textype, *instr;
6235     DWORD arg0, arg1, arg2;
6236     char colorcor_dst[8];
6237     BOOL op_equal;
6238     GLuint ret;
6239 
6240     if (!string_buffer_init(&buffer))
6241     {
6242         ERR("Failed to initialize shader buffer.\n");
6243         return 0;
6244     }
6245 
6246     shader_addline(&buffer, "!!ARBfp1.0\n");
6247 
6248     if (settings->color_key_enabled)
6249     {
6250         shader_addline(&buffer, "PARAM color_key_low = program.env[%u];\n", ARB_FFP_CONST_COLOR_KEY_LOW);
6251         shader_addline(&buffer, "PARAM color_key_high = program.env[%u];\n", ARB_FFP_CONST_COLOR_KEY_HIGH);
6252         tex_read |= 1;
6253     }
6254 
6255     /* Find out which textures are read */
6256     for (stage = 0; stage < MAX_TEXTURES; ++stage)
6257     {
6258         if (settings->op[stage].cop == WINED3D_TOP_DISABLE)
6259             break;
6260 
6261         arg0 = settings->op[stage].carg0 & WINED3DTA_SELECTMASK;
6262         arg1 = settings->op[stage].carg1 & WINED3DTA_SELECTMASK;
6263         arg2 = settings->op[stage].carg2 & WINED3DTA_SELECTMASK;
6264 
6265         if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE)
6266             tex_read |= 1u << stage;
6267         if (settings->op[stage].dst == tempreg)
6268             tempreg_used = TRUE;
6269         if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP)
6270             tempreg_used = TRUE;
6271         if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR)
6272             tfactor_used = TRUE;
6273         if (arg0 == WINED3DTA_CONSTANT || arg1 == WINED3DTA_CONSTANT || arg2 == WINED3DTA_CONSTANT)
6274             constant_used |= 1u << stage;
6275 
6276         switch (settings->op[stage].cop)
6277         {
6278             case WINED3D_TOP_BUMPENVMAP_LUMINANCE:
6279                 luminance_used |= 1u << stage;
6280                 /* fall through */
6281             case WINED3D_TOP_BUMPENVMAP:
6282                 bump_used |= 1u << stage;
6283                 /* fall through */
6284             case WINED3D_TOP_BLEND_TEXTURE_ALPHA:
6285             case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM:
6286                 tex_read |= 1u << stage;
6287                 break;
6288 
6289             case WINED3D_TOP_BLEND_FACTOR_ALPHA:
6290                 tfactor_used = TRUE;
6291                 break;
6292 
6293             default:
6294                 break;
6295         }
6296 
6297         if (settings->op[stage].aop == WINED3D_TOP_DISABLE)
6298             continue;
6299 
6300         arg0 = settings->op[stage].aarg0 & WINED3DTA_SELECTMASK;
6301         arg1 = settings->op[stage].aarg1 & WINED3DTA_SELECTMASK;
6302         arg2 = settings->op[stage].aarg2 & WINED3DTA_SELECTMASK;
6303 
6304         if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE)
6305             tex_read |= 1u << stage;
6306         if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP)
6307             tempreg_used = TRUE;
6308         if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR)
6309             tfactor_used = TRUE;
6310         if (arg0 == WINED3DTA_CONSTANT || arg1 == WINED3DTA_CONSTANT || arg2 == WINED3DTA_CONSTANT)
6311             constant_used |= 1u << stage;
6312     }
6313     lowest_disabled_stage = stage;
6314 
6315     switch (settings->fog)
6316     {
6317         case WINED3D_FFP_PS_FOG_OFF:                                                         break;
6318         case WINED3D_FFP_PS_FOG_LINEAR:
6319             if (gl_info->quirks & WINED3D_QUIRK_BROKEN_ARB_FOG)
6320             {
6321                 custom_linear_fog = TRUE;
6322                 break;
6323             }
6324             shader_addline(&buffer, "OPTION ARB_fog_linear;\n");
6325             break;
6326 
6327         case WINED3D_FFP_PS_FOG_EXP:    shader_addline(&buffer, "OPTION ARB_fog_exp;\n");    break;
6328         case WINED3D_FFP_PS_FOG_EXP2:   shader_addline(&buffer, "OPTION ARB_fog_exp2;\n");   break;
6329         default: FIXME("Unexpected fog setting %d\n", settings->fog);
6330     }
6331 
6332     shader_addline(&buffer, "PARAM const = {1, 2, 4, 0.5};\n");
6333     shader_addline(&buffer, "TEMP TMP;\n");
6334     shader_addline(&buffer, "TEMP ret;\n");
6335     if (tempreg_used || settings->sRGB_write) shader_addline(&buffer, "TEMP tempreg;\n");
6336     shader_addline(&buffer, "TEMP arg0;\n");
6337     shader_addline(&buffer, "TEMP arg1;\n");
6338     shader_addline(&buffer, "TEMP arg2;\n");
6339     for (stage = 0; stage < MAX_TEXTURES; ++stage)
6340     {
6341         if (constant_used & (1u << stage))
6342             shader_addline(&buffer, "PARAM const%u = program.env[%u];\n", stage, ARB_FFP_CONST_CONSTANT(stage));
6343 
6344         if (!(tex_read & (1u << stage)))
6345             continue;
6346 
6347         shader_addline(&buffer, "TEMP tex%u;\n", stage);
6348 
6349         if (!(bump_used & (1u << stage)))
6350             continue;
6351         shader_addline(&buffer, "PARAM bumpmat%u = program.env[%u];\n", stage, ARB_FFP_CONST_BUMPMAT(stage));
6352 
6353         if (!(luminance_used & (1u << stage)))
6354             continue;
6355         shader_addline(&buffer, "PARAM luminance%u = program.env[%u];\n", stage, ARB_FFP_CONST_LUMINANCE(stage));
6356     }
6357     if (tfactor_used)
6358         shader_addline(&buffer, "PARAM tfactor = program.env[%u];\n", ARB_FFP_CONST_TFACTOR);
6359     shader_addline(&buffer, "PARAM specular_enable = program.env[%u];\n", ARB_FFP_CONST_SPECULAR_ENABLE);
6360 
6361     if (settings->sRGB_write)
6362     {
6363         shader_addline(&buffer, "PARAM srgb_consts0 = ");
6364         shader_arb_append_imm_vec4(&buffer, wined3d_srgb_const0);
6365         shader_addline(&buffer, ";\n");
6366         shader_addline(&buffer, "PARAM srgb_consts1 = ");
6367         shader_arb_append_imm_vec4(&buffer, wined3d_srgb_const1);
6368         shader_addline(&buffer, ";\n");
6369     }
6370 
6371     if (lowest_disabled_stage < 7 && settings->emul_clipplanes)
6372         shader_addline(&buffer, "KIL fragment.texcoord[7];\n");
6373 
6374     if (tempreg_used || settings->sRGB_write)
6375         shader_addline(&buffer, "MOV tempreg, 0.0;\n");
6376 
6377     /* Generate texture sampling instructions */
6378     for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage)
6379     {
6380         if (!(tex_read & (1u << stage)))
6381             continue;
6382 
6383         textype = arbfp_texture_target(settings->op[stage].tex_type);
6384 
6385         if(settings->op[stage].projected == proj_none) {
6386             instr = "TEX";
6387         } else if(settings->op[stage].projected == proj_count4 ||
6388                   settings->op[stage].projected == proj_count3) {
6389             instr = "TXP";
6390         } else {
6391             FIXME("Unexpected projection mode %d\n", settings->op[stage].projected);
6392             instr = "TXP";
6393         }
6394 
6395         if (stage > 0
6396                 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP
6397                 || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE))
6398         {
6399             shader_addline(&buffer, "SWZ arg1, bumpmat%u, x, z, 0, 0;\n", stage - 1);
6400             shader_addline(&buffer, "DP3 ret.x, arg1, tex%u;\n", stage - 1);
6401             shader_addline(&buffer, "SWZ arg1, bumpmat%u, y, w, 0, 0;\n", stage - 1);
6402             shader_addline(&buffer, "DP3 ret.y, arg1, tex%u;\n", stage - 1);
6403 
6404             /* with projective textures, texbem only divides the static texture coord, not the displacement,
6405              * so multiply the displacement with the dividing parameter before passing it to TXP
6406              */
6407             if (settings->op[stage].projected != proj_none) {
6408                 if(settings->op[stage].projected == proj_count4) {
6409                     shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].w;\n", stage);
6410                     shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n", stage, stage);
6411                 } else {
6412                     shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].z;\n", stage);
6413                     shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n", stage, stage);
6414                 }
6415             } else {
6416                 shader_addline(&buffer, "ADD ret, ret, fragment.texcoord[%u];\n", stage);
6417             }
6418 
6419             shader_addline(&buffer, "%s tex%u, ret, texture[%u], %s;\n",
6420                     instr, stage, stage, textype);
6421             if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)
6422             {
6423                 shader_addline(&buffer, "MAD_SAT ret.x, tex%u.z, luminance%u.x, luminance%u.y;\n",
6424                                stage - 1, stage - 1, stage - 1);
6425                 shader_addline(&buffer, "MUL tex%u, tex%u, ret.x;\n", stage, stage);
6426             }
6427         } else if(settings->op[stage].projected == proj_count3) {
6428             shader_addline(&buffer, "MOV ret, fragment.texcoord[%u];\n", stage);
6429             shader_addline(&buffer, "MOV ret.w, ret.z;\n");
6430             shader_addline(&buffer, "%s tex%u, ret, texture[%u], %s;\n",
6431                             instr, stage, stage, textype);
6432         } else {
6433             shader_addline(&buffer, "%s tex%u, fragment.texcoord[%u], texture[%u], %s;\n",
6434                             instr, stage, stage, stage, textype);
6435         }
6436 
6437         sprintf(colorcor_dst, "tex%u", stage);
6438         masks = calc_color_correction(settings->op[stage].color_fixup, WINED3DSP_WRITEMASK_ALL);
6439         gen_color_correction(&buffer, colorcor_dst, colorcor_dst, "const.x", "const.y",
6440                 settings->op[stage].color_fixup, masks);
6441     }
6442 
6443     if (settings->color_key_enabled)
6444     {
6445         shader_addline(&buffer, "SLT TMP, tex0, color_key_low;\n"); /* below low key */
6446         shader_addline(&buffer, "SGE ret, tex0, color_key_high;\n"); /* above high key */
6447         shader_addline(&buffer, "ADD TMP, TMP, ret;\n"); /* or */
6448         shader_addline(&buffer, "DP4 TMP.b, TMP, TMP;\n"); /* on any channel */
6449         shader_addline(&buffer, "SGE TMP, -TMP.b, 0.0;\n"); /* logical not */
6450         shader_addline(&buffer, "KIL -TMP;\n"); /* discard if true */
6451     }
6452 
6453     shader_addline(&buffer, "MOV ret, fragment.color.primary;\n");
6454 
6455     /* Generate the main shader */
6456     for (stage = 0; stage < MAX_TEXTURES; ++stage)
6457     {
6458         if (settings->op[stage].cop == WINED3D_TOP_DISABLE)
6459             break;
6460 
6461         if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1
6462                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1)
6463             op_equal = settings->op[stage].carg1 == settings->op[stage].aarg1;
6464         else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1
6465                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2)
6466             op_equal = settings->op[stage].carg1 == settings->op[stage].aarg2;
6467         else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2
6468                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1)
6469             op_equal = settings->op[stage].carg2 == settings->op[stage].aarg1;
6470         else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2
6471                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2)
6472             op_equal = settings->op[stage].carg2 == settings->op[stage].aarg2;
6473         else
6474             op_equal = settings->op[stage].aop   == settings->op[stage].cop
6475                     && settings->op[stage].carg0 == settings->op[stage].aarg0
6476                     && settings->op[stage].carg1 == settings->op[stage].aarg1
6477                     && settings->op[stage].carg2 == settings->op[stage].aarg2;
6478 
6479         if (settings->op[stage].aop == WINED3D_TOP_DISABLE)
6480         {
6481             gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst,
6482                           settings->op[stage].cop, settings->op[stage].carg0,
6483                           settings->op[stage].carg1, settings->op[stage].carg2);
6484         }
6485         else if (op_equal)
6486         {
6487             gen_ffp_instr(&buffer, stage, TRUE, TRUE, settings->op[stage].dst,
6488                           settings->op[stage].cop, settings->op[stage].carg0,
6489                           settings->op[stage].carg1, settings->op[stage].carg2);
6490         }
6491         else if (settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP
6492                 && settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP_LUMINANCE)
6493         {
6494             gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst,
6495                           settings->op[stage].cop, settings->op[stage].carg0,
6496                           settings->op[stage].carg1, settings->op[stage].carg2);
6497             gen_ffp_instr(&buffer, stage, FALSE, TRUE, settings->op[stage].dst,
6498                           settings->op[stage].aop, settings->op[stage].aarg0,
6499                           settings->op[stage].aarg1, settings->op[stage].aarg2);
6500         }
6501     }
6502 
6503     if (settings->sRGB_write || custom_linear_fog)
6504     {
6505         shader_addline(&buffer, "MAD ret, fragment.color.secondary, specular_enable, ret;\n");
6506         if (settings->sRGB_write)
6507             arbfp_add_sRGB_correction(&buffer, "ret", "arg0", "arg1", "arg2", "tempreg", FALSE);
6508         if (custom_linear_fog)
6509             arbfp_add_linear_fog(&buffer, "ret", "arg0");
6510         shader_addline(&buffer, "MOV result.color, ret;\n");
6511     }
6512     else
6513     {
6514         shader_addline(&buffer, "MAD result.color, fragment.color.secondary, specular_enable, ret;\n");
6515     }
6516 
6517     /* Footer */
6518     shader_addline(&buffer, "END\n");
6519 
6520     /* Generate the shader */
6521     GL_EXTCALL(glGenProgramsARB(1, &ret));
6522     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ret));
6523     shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer);
6524 
6525     string_buffer_free(&buffer);
6526     return ret;
6527 }
6528 
6529 static void fragment_prog_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
6530 {
6531     const struct wined3d_gl_info *gl_info = context->gl_info;
6532     const struct wined3d_device *device = context->device;
6533     struct shader_arb_priv *priv = device->fragment_priv;
6534     BOOL use_pshader = use_ps(state);
6535     struct ffp_frag_settings settings;
6536     const struct arbfp_ffp_desc *desc;
6537     unsigned int i;
6538 
6539     TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id);
6540 
6541     if (isStateDirty(context, STATE_RENDER(WINED3D_RS_FOGENABLE)))
6542     {
6543         if (!use_pshader && device->shader_backend == &arb_program_shader_backend && context->last_was_pshader)
6544         {
6545             /* Reload fixed function constants since they collide with the
6546              * pixel shader constants. */
6547             for (i = 0; i < MAX_TEXTURES; ++i)
6548             {
6549                 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00));
6550                 state_tss_constant_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_CONSTANT));
6551             }
6552             state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR));
6553             state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE));
6554             color_key_arbfp(context, state, STATE_COLOR_KEY);
6555         }
6556         else if (use_pshader)
6557         {
6558             context->shader_update_mask |= 1u << WINED3D_SHADER_TYPE_PIXEL;
6559         }
6560         return;
6561     }
6562 
6563     if (!use_pshader)
6564     {
6565         /* Find or create a shader implementing the fixed function pipeline
6566          * settings, then activate it. */
6567         gen_ffp_frag_op(context, state, &settings, FALSE);
6568         desc = (const struct arbfp_ffp_desc *)find_ffp_frag_shader(&priv->fragment_shaders, &settings);
6569         if (!desc)
6570         {
6571             struct arbfp_ffp_desc *new_desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*new_desc));
6572             if (!new_desc)
6573             {
6574                 ERR("Out of memory\n");
6575                 return;
6576             }
6577 
6578             new_desc->parent.settings = settings;
6579             new_desc->shader = gen_arbfp_ffp_shader(&settings, gl_info);
6580             add_ffp_frag_shader(&priv->fragment_shaders, &new_desc->parent);
6581             TRACE("Allocated fixed function replacement shader descriptor %p\n", new_desc);
6582             desc = new_desc;
6583         }
6584 
6585         /* Now activate the replacement program. GL_FRAGMENT_PROGRAM_ARB is already active (however, note the
6586          * comment above the shader_select call below). If e.g. GLSL is active, the shader_select call will
6587          * deactivate it.
6588          */
6589         GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader));
6590         checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)");
6591         priv->current_fprogram_id = desc->shader;
6592 
6593         if (device->shader_backend == &arb_program_shader_backend && context->last_was_pshader)
6594         {
6595             /* Reload fixed function constants since they collide with the
6596              * pixel shader constants. */
6597             for (i = 0; i < MAX_TEXTURES; ++i)
6598             {
6599                 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00));
6600                 state_tss_constant_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_CONSTANT));
6601             }
6602             state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR));
6603             state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE));
6604             color_key_arbfp(context, state, STATE_COLOR_KEY);
6605         }
6606         context->last_was_pshader = FALSE;
6607     }
6608     else if (!context->last_was_pshader)
6609     {
6610         if (device->shader_backend == &arb_program_shader_backend)
6611             context->constant_update_mask |= WINED3D_SHADER_CONST_PS_F;
6612         context->last_was_pshader = TRUE;
6613     }
6614 
6615     context->shader_update_mask |= 1u << WINED3D_SHADER_TYPE_PIXEL;
6616 }
6617 
6618 /* We can't link the fog states to the fragment state directly since the
6619  * vertex pipeline links them to FOGENABLE. A different linking in different
6620  * pipeline parts can't be expressed in the combined state table, so we need
6621  * to handle that with a forwarding function. The other invisible side effect
6622  * is that changing the fog start and fog end (which links to FOGENABLE in
6623  * vertex) results in the fragment_prog_arbfp function being called because
6624  * FOGENABLE is dirty, which calls this function here. */
6625 static void state_arbfp_fog(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
6626 {
6627     enum fogsource new_source;
6628     DWORD fogstart = state->render_states[WINED3D_RS_FOGSTART];
6629     DWORD fogend = state->render_states[WINED3D_RS_FOGEND];
6630 
6631     TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id);
6632 
6633     if (!isStateDirty(context, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL)))
6634         fragment_prog_arbfp(context, state, state_id);
6635 
6636     if (!state->render_states[WINED3D_RS_FOGENABLE])
6637         return;
6638 
6639     if (state->render_states[WINED3D_RS_FOGTABLEMODE] == WINED3D_FOG_NONE)
6640     {
6641         if (use_vs(state))
6642         {
6643             new_source = FOGSOURCE_VS;
6644         }
6645         else
6646         {
6647             if (state->render_states[WINED3D_RS_FOGVERTEXMODE] == WINED3D_FOG_NONE || context->last_was_rhw)
6648                 new_source = FOGSOURCE_COORD;
6649             else
6650                 new_source = FOGSOURCE_FFP;
6651         }
6652     }
6653     else
6654     {
6655         new_source = FOGSOURCE_FFP;
6656     }
6657 
6658     if (new_source != context->fog_source || fogstart == fogend)
6659     {
6660         context->fog_source = new_source;
6661         state_fogstartend(context, state, STATE_RENDER(WINED3D_RS_FOGSTART));
6662     }
6663 }
6664 
6665 static void textransform(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
6666 {
6667     if (!isStateDirty(context, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL)))
6668         fragment_prog_arbfp(context, state, state_id);
6669 }
6670 
6671 static const struct StateEntryTemplate arbfp_fragmentstate_template[] =
6672 {
6673     {STATE_RENDER(WINED3D_RS_TEXTUREFACTOR),              { STATE_RENDER(WINED3D_RS_TEXTUREFACTOR),             state_texfactor_arbfp   }, WINED3D_GL_EXT_NONE             },
6674     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6675     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6676     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6677     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6678     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6679     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6680     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6681     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6682     {STATE_TEXTURESTAGE(0, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6683     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
6684     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6685     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6686     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6687     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
6688     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6689     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6690     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6691     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6692     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6693     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6694     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6695     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6696     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6697     {STATE_TEXTURESTAGE(1, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6698     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
6699     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6700     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6701     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6702     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
6703     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6704     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6705     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6706     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6707     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6708     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6709     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6710     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6711     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6712     {STATE_TEXTURESTAGE(2, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6713     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
6714     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6715     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6716     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6717     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
6718     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6719     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6720     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6721     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6722     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6723     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6724     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6725     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6726     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6727     {STATE_TEXTURESTAGE(3, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6728     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
6729     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6730     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6731     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6732     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
6733     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6734     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6735     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6736     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6737     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6738     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6739     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6740     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6741     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6742     {STATE_TEXTURESTAGE(4, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6743     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
6744     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6745     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6746     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6747     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
6748     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6749     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6750     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6751     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6752     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6753     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6754     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6755     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6756     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6757     {STATE_TEXTURESTAGE(5, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6758     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
6759     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6760     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6761     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6762     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
6763     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6764     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6765     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6766     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6767     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6768     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6769     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6770     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6771     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6772     {STATE_TEXTURESTAGE(6, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6773     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
6774     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6775     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6776     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6777     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
6778     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6779     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6780     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6781     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6782     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6783     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6784     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6785     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6786     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6787     {STATE_TEXTURESTAGE(7, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6788     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
6789     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6790     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6791     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
6792     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
6793     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6794     {STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),             { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            fragment_prog_arbfp     }, WINED3D_GL_EXT_NONE             },
6795     {STATE_RENDER(WINED3D_RS_ALPHAFUNC),                  { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE),           NULL                    }, WINED3D_GL_EXT_NONE             },
6796     {STATE_RENDER(WINED3D_RS_ALPHAREF),                   { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE),           NULL                    }, WINED3D_GL_EXT_NONE             },
6797     {STATE_RENDER(WINED3D_RS_ALPHATESTENABLE),            { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE),           alpha_test_arbfp        }, WINED3D_GL_EXT_NONE             },
6798     {STATE_RENDER(WINED3D_RS_COLORKEYENABLE),             { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6799     {STATE_COLOR_KEY,                                     { STATE_COLOR_KEY,                                    color_key_arbfp         }, WINED3D_GL_EXT_NONE             },
6800     {STATE_RENDER(WINED3D_RS_FOGENABLE),                  { STATE_RENDER(WINED3D_RS_FOGENABLE),                 state_arbfp_fog         }, WINED3D_GL_EXT_NONE             },
6801     {STATE_RENDER(WINED3D_RS_FOGTABLEMODE),               { STATE_RENDER(WINED3D_RS_FOGENABLE),                 NULL                    }, WINED3D_GL_EXT_NONE             },
6802     {STATE_RENDER(WINED3D_RS_FOGVERTEXMODE),              { STATE_RENDER(WINED3D_RS_FOGENABLE),                 NULL                    }, WINED3D_GL_EXT_NONE             },
6803     {STATE_RENDER(WINED3D_RS_FOGSTART),                   { STATE_RENDER(WINED3D_RS_FOGSTART),                  state_fogstartend       }, WINED3D_GL_EXT_NONE             },
6804     {STATE_RENDER(WINED3D_RS_FOGEND),                     { STATE_RENDER(WINED3D_RS_FOGSTART),                  NULL                    }, WINED3D_GL_EXT_NONE             },
6805     {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE),            { STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE),           state_srgbwrite         }, ARB_FRAMEBUFFER_SRGB            },
6806     {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE),            { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6807     {STATE_RENDER(WINED3D_RS_FOGCOLOR),                   { STATE_RENDER(WINED3D_RS_FOGCOLOR),                  state_fogcolor          }, WINED3D_GL_EXT_NONE             },
6808     {STATE_RENDER(WINED3D_RS_FOGDENSITY),                 { STATE_RENDER(WINED3D_RS_FOGDENSITY),                state_fogdensity        }, WINED3D_GL_EXT_NONE             },
6809     {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
6810     {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
6811     {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
6812     {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
6813     {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
6814     {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
6815     {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
6816     {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
6817     {STATE_TEXTURESTAGE(0, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(0, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
6818     {STATE_TEXTURESTAGE(1, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(1, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
6819     {STATE_TEXTURESTAGE(2, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(2, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
6820     {STATE_TEXTURESTAGE(3, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(3, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
6821     {STATE_TEXTURESTAGE(4, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(4, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
6822     {STATE_TEXTURESTAGE(5, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(5, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
6823     {STATE_TEXTURESTAGE(6, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(6, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
6824     {STATE_TEXTURESTAGE(7, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(7, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
6825     {STATE_RENDER(WINED3D_RS_SPECULARENABLE),             { STATE_RENDER(WINED3D_RS_SPECULARENABLE),            state_arb_specularenable}, WINED3D_GL_EXT_NONE             },
6826     {STATE_RENDER(WINED3D_RS_SHADEMODE),                  { STATE_RENDER(WINED3D_RS_SHADEMODE),                 state_shademode         }, WINED3D_GL_EXT_NONE             },
6827     {0 /* Terminate */,                                   { 0,                                                  0                       }, WINED3D_GL_EXT_NONE             },
6828 };
6829 
6830 static BOOL arbfp_alloc_context_data(struct wined3d_context *context)
6831 {
6832     return TRUE;
6833 }
6834 
6835 static void arbfp_free_context_data(struct wined3d_context *context)
6836 {
6837 }
6838 
6839 const struct fragment_pipeline arbfp_fragment_pipeline = {
6840     arbfp_enable,
6841     arbfp_get_caps,
6842     arbfp_get_emul_mask,
6843     arbfp_alloc,
6844     arbfp_free,
6845     arbfp_alloc_context_data,
6846     arbfp_free_context_data,
6847     shader_arb_color_fixup_supported,
6848     arbfp_fragmentstate_template,
6849 };
6850 
6851 struct arbfp_blit_type
6852 {
6853     enum complex_fixup fixup : 4;
6854     enum wined3d_gl_resource_type res_type : 3;
6855     DWORD use_color_key : 1;
6856     DWORD padding : 24;
6857 };
6858 
6859 struct arbfp_blit_desc
6860 {
6861     GLuint shader;
6862     struct arbfp_blit_type type;
6863     struct wine_rb_entry entry;
6864 };
6865 
6866 #define ARBFP_BLIT_PARAM_SIZE 0
6867 #define ARBFP_BLIT_PARAM_COLOR_KEY_LOW 1
6868 #define ARBFP_BLIT_PARAM_COLOR_KEY_HIGH 2
6869 
6870 struct wined3d_arbfp_blitter
6871 {
6872     struct wined3d_blitter blitter;
6873     struct wine_rb_tree shaders;
6874     GLuint palette_texture;
6875 };
6876 
6877 static int arbfp_blit_type_compare(const void *key, const struct wine_rb_entry *entry)
6878 {
6879     const struct arbfp_blit_type *ka = key;
6880     const struct arbfp_blit_type *kb = &WINE_RB_ENTRY_VALUE(entry, const struct arbfp_blit_desc, entry)->type;
6881 
6882     return memcmp(ka, kb, sizeof(*ka));
6883 }
6884 
6885 /* Context activation is done by the caller. */
6886 static void arbfp_free_blit_shader(struct wine_rb_entry *entry, void *ctx)
6887 {
6888     struct arbfp_blit_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_blit_desc, entry);
6889     const struct wined3d_gl_info *gl_info;
6890     struct wined3d_context *context;
6891 
6892     context = ctx;
6893     gl_info = context->gl_info;
6894 
6895     GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader));
6896     checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)");
6897     HeapFree(GetProcessHeap(), 0, entry_arb);
6898 }
6899 
6900 /* Context activation is done by the caller. */
6901 static void arbfp_blitter_destroy(struct wined3d_blitter *blitter, struct wined3d_context *context)
6902 {
6903     const struct wined3d_gl_info *gl_info = context->gl_info;
6904     struct wined3d_arbfp_blitter *arbfp_blitter;
6905     struct wined3d_blitter *next;
6906 
6907     if ((next = blitter->next))
6908         next->ops->blitter_destroy(next, context);
6909 
6910     arbfp_blitter = CONTAINING_RECORD(blitter, struct wined3d_arbfp_blitter, blitter);
6911 
6912     wine_rb_destroy(&arbfp_blitter->shaders, arbfp_free_blit_shader, context);
6913     checkGLcall("Delete blit programs");
6914 
6915     if (arbfp_blitter->palette_texture)
6916         gl_info->gl_ops.gl.p_glDeleteTextures(1, &arbfp_blitter->palette_texture);
6917 
6918     HeapFree(GetProcessHeap(), 0, arbfp_blitter);
6919 }
6920 
6921 static BOOL gen_planar_yuv_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type,
6922         char *luminance)
6923 {
6924     char chroma;
6925     const char *tex, *texinstr = "TXP";
6926 
6927     if (type->fixup == COMPLEX_FIXUP_UYVY)
6928     {
6929         chroma = 'x';
6930         *luminance = 'w';
6931     }
6932     else
6933     {
6934         chroma = 'w';
6935         *luminance = 'x';
6936     }
6937 
6938     tex = arbfp_texture_target(type->res_type);
6939     if (type->res_type == WINED3D_GL_RES_TYPE_TEX_RECT)
6940         texinstr = "TEX";
6941 
6942     /* First we have to read the chroma values. This means we need at least two pixels(no filtering),
6943      * or 4 pixels(with filtering). To get the unmodified chromas, we have to rid ourselves of the
6944      * filtering when we sample the texture.
6945      *
6946      * These are the rules for reading the chroma:
6947      *
6948      * Even pixel: Cr
6949      * Even pixel: U
6950      * Odd pixel: V
6951      *
6952      * So we have to get the sampling x position in non-normalized coordinates in integers
6953      */
6954     if (type->res_type != WINED3D_GL_RES_TYPE_TEX_RECT)
6955     {
6956         shader_addline(buffer, "MUL texcrd.xy, fragment.texcoord[0], size.x;\n");
6957         shader_addline(buffer, "MOV texcrd.w, size.x;\n");
6958     }
6959     else
6960     {
6961         shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
6962     }
6963     /* We must not allow filtering between pixel x and x+1, this would mix U and V
6964      * Vertical filtering is ok. However, bear in mind that the pixel center is at
6965      * 0.5, so add 0.5.
6966      */
6967     shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
6968     shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.y;\n");
6969 
6970     /* Divide the x coordinate by 0.5 and get the fraction. This gives 0.25 and 0.75 for the
6971      * even and odd pixels respectively
6972      */
6973     shader_addline(buffer, "MUL texcrd2, texcrd, coef.y;\n");
6974     shader_addline(buffer, "FRC texcrd2, texcrd2;\n");
6975 
6976     /* Sample Pixel 1 */
6977     shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex);
6978 
6979     /* Put the value into either of the chroma values */
6980     shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n");
6981     shader_addline(buffer, "MUL chroma.x, luminance.%c, temp.x;\n", chroma);
6982     shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n");
6983     shader_addline(buffer, "MUL chroma.y, luminance.%c, temp.x;\n", chroma);
6984 
6985     /* Sample pixel 2. If we read an even pixel(SLT above returned 1), sample
6986      * the pixel right to the current one. Otherwise, sample the left pixel.
6987      * Bias and scale the SLT result to -1;1 and add it to the texcrd.x.
6988      */
6989     shader_addline(buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n");
6990     shader_addline(buffer, "ADD texcrd.x, texcrd, temp.x;\n");
6991     shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex);
6992 
6993     /* Put the value into the other chroma */
6994     shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n");
6995     shader_addline(buffer, "MAD chroma.y, luminance.%c, temp.x, chroma.y;\n", chroma);
6996     shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n");
6997     shader_addline(buffer, "MAD chroma.x, luminance.%c, temp.x, chroma.x;\n", chroma);
6998 
6999     /* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of
7000      * the current one and lerp the two U and V values
7001      */
7002 
7003     /* This gives the correctly filtered luminance value */
7004     shader_addline(buffer, "TEX luminance, fragment.texcoord[0], texture[0], %s;\n", tex);
7005 
7006     return TRUE;
7007 }
7008 
7009 static BOOL gen_yv12_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type,
7010         char *luminance)
7011 {
7012     const char *tex;
7013     static const float yv12_coef[]
7014             = {2.0f / 3.0f, 1.0f / 6.0f, (2.0f / 3.0f) + (1.0f / 6.0f), 1.0f / 3.0f};
7015 
7016     tex = arbfp_texture_target(type->res_type);
7017 
7018     /* YV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2)
7019      * V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So the effective
7020      * bitdepth is 12 bits per pixel. Since the U and V planes have only half the
7021      * pitch of the luminance plane, the packing into the gl texture is a bit
7022      * unfortunate. If the whole texture is interpreted as luminance data it looks
7023      * approximately like this:
7024      *
7025      *        +----------------------------------+----
7026      *        |                                  |
7027      *        |                                  |
7028      *        |                                  |
7029      *        |                                  |
7030      *        |                                  |   2
7031      *        |            LUMINANCE             |   -
7032      *        |                                  |   3
7033      *        |                                  |
7034      *        |                                  |
7035      *        |                                  |
7036      *        |                                  |
7037      *        +----------------+-----------------+----
7038      *        |                |                 |
7039      *        |  V even rows   |  V odd rows     |
7040      *        |                |                 |   1
7041      *        +----------------+------------------   -
7042      *        |                |                 |   3
7043      *        |  U even rows   |  U odd rows     |
7044      *        |                |                 |
7045      *        +----------------+-----------------+----
7046      *        |                |                 |
7047      *        |     0.5        |       0.5       |
7048      *
7049      * So it appears as if there are 4 chroma images, but in fact the odd rows
7050      * in the chroma images are in the same row as the even ones. So it is
7051      * kinda tricky to read
7052      *
7053      * When reading from rectangle textures, keep in mind that the input y coordinates
7054      * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height
7055      */
7056     shader_addline(buffer, "PARAM yv12_coef = ");
7057     shader_arb_append_imm_vec4(buffer, yv12_coef);
7058     shader_addline(buffer, ";\n");
7059 
7060     shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
7061     /* the chroma planes have only half the width */
7062     shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n");
7063 
7064     /* The first value is between 2/3 and 5/6th of the texture's height, so scale+bias
7065      * the coordinate. Also read the right side of the image when reading odd lines
7066      *
7067      * Don't forget to clamp the y values in into the range, otherwise we'll get filtering
7068      * bleeding
7069      */
7070     if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7071     {
7072 
7073         shader_addline(buffer, "RCP chroma.w, size.y;\n");
7074 
7075         shader_addline(buffer, "MUL texcrd2.y, texcrd.y, size.y;\n");
7076 
7077         shader_addline(buffer, "FLR texcrd2.y, texcrd2.y;\n");
7078         shader_addline(buffer, "MAD texcrd.y, texcrd.y, yv12_coef.y, yv12_coef.x;\n");
7079 
7080         /* Read odd lines from the right side(add size * 0.5 to the x coordinate */
7081         shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
7082         shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");
7083         shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");
7084         shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n");
7085 
7086         /* clamp, keep the half pixel origin in mind */
7087         shader_addline(buffer, "MAD temp.y, coef.y, chroma.w, yv12_coef.x;\n");
7088         shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
7089         shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.z;\n");
7090         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
7091     }
7092     else
7093     {
7094         /* Read from [size - size+size/4] */
7095         shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n");
7096         shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.w, size.y;\n");
7097 
7098         /* Read odd lines from the right side(add size * 0.5 to the x coordinate */
7099         shader_addline(buffer, "ADD texcrd2.x, texcrd.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
7100         shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");
7101         shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");
7102         shader_addline(buffer, "MUL texcrd2.x, texcrd2.x, size.x;\n");
7103         shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n");
7104 
7105         /* Make sure to read exactly from the pixel center */
7106         shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n");
7107         shader_addline(buffer, "ADD texcrd.y, texcrd.y, coef.y;\n");
7108 
7109         /* Clamp */
7110         shader_addline(buffer, "MAD temp.y, size.y, coef.w, size.y;\n");
7111         shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n");
7112         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
7113         shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n");
7114         shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
7115     }
7116     /* Read the texture, put the result into the output register */
7117     shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
7118     shader_addline(buffer, "MOV chroma.x, temp.w;\n");
7119 
7120     /* The other chroma value is 1/6th of the texture lower, from 5/6th to 6/6th
7121      * No need to clamp because we're just reusing the already clamped value from above
7122      */
7123     if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7124         shader_addline(buffer, "ADD texcrd.y, texcrd.y, yv12_coef.y;\n");
7125     else
7126         shader_addline(buffer, "MAD texcrd.y, size.y, coef.w, texcrd.y;\n");
7127     shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
7128     shader_addline(buffer, "MOV chroma.y, temp.w;\n");
7129 
7130     /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate.
7131      * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance
7132      * values due to filtering
7133      */
7134     shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
7135     if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7136     {
7137         /* Multiply the y coordinate by 2/3 and clamp it */
7138         shader_addline(buffer, "MUL texcrd.y, texcrd.y, yv12_coef.x;\n");
7139         shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.x;\n");
7140         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
7141         shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
7142     }
7143     else
7144     {
7145         /* Reading from texture_rectangles is pretty straightforward, just use the unmodified
7146          * texture coordinate. It is still a good idea to clamp it though, since the opengl texture
7147          * is bigger
7148          */
7149         shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n");
7150         shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n");
7151         shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
7152     }
7153     *luminance = 'a';
7154 
7155     return TRUE;
7156 }
7157 
7158 static BOOL gen_nv12_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type,
7159         char *luminance)
7160 {
7161     const char *tex;
7162     static const float nv12_coef[]
7163             = {2.0f / 3.0f, 1.0f / 3.0f, 1.0f, 1.0f};
7164 
7165     tex = arbfp_texture_target(type->res_type);
7166 
7167     /* NV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2)
7168      * sized plane where each component is an UV pair. So the effective
7169      * bitdepth is 12 bits per pixel If the whole texture is interpreted as luminance
7170      * data it looks approximately like this:
7171      *
7172      *        +----------------------------------+----
7173      *        |                                  |
7174      *        |                                  |
7175      *        |                                  |
7176      *        |                                  |
7177      *        |                                  |   2
7178      *        |            LUMINANCE             |   -
7179      *        |                                  |   3
7180      *        |                                  |
7181      *        |                                  |
7182      *        |                                  |
7183      *        |                                  |
7184      *        +----------------------------------+----
7185      *        |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV|
7186      *        |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV|
7187      *        |                                  |   1
7188      *        |                                  |   -
7189      *        |                                  |   3
7190      *        |                                  |
7191      *        |                                  |
7192      *        +----------------------------------+----
7193      *
7194      * When reading from rectangle textures, keep in mind that the input y coordinates
7195      * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height. */
7196 
7197     shader_addline(buffer, "PARAM nv12_coef = ");
7198     shader_arb_append_imm_vec4(buffer, nv12_coef);
7199     shader_addline(buffer, ";\n");
7200 
7201     shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
7202     /* We only have half the number of chroma pixels. */
7203     shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n");
7204 
7205     if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7206     {
7207         shader_addline(buffer, "RCP chroma.w, size.x;\n");
7208         shader_addline(buffer, "RCP chroma.z, size.y;\n");
7209 
7210         shader_addline(buffer, "MAD texcrd.y, texcrd.y, nv12_coef.y, nv12_coef.x;\n");
7211 
7212         /* We must not allow filtering horizontally, this would mix U and V.
7213          * Vertical filtering is ok. However, bear in mind that the pixel center is at
7214          * 0.5, so add 0.5. */
7215 
7216         /* Convert to non-normalized coordinates so we can find the
7217          * individual pixel. */
7218         shader_addline(buffer, "MUL texcrd.x, texcrd.x, size.x;\n");
7219         shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
7220         /* Multiply by 2 since chroma components are stored in UV pixel pairs,
7221          * add 0.5 to hit the center of the pixel. */
7222         shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n");
7223 
7224         /* Convert back to normalized coordinates. */
7225         shader_addline(buffer, "MUL texcrd.x, texcrd.x, chroma.w;\n");
7226 
7227         /* Clamp, keep the half pixel origin in mind. */
7228         shader_addline(buffer, "MAD temp.y, coef.y, chroma.z, nv12_coef.x;\n");
7229         shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
7230         shader_addline(buffer, "MAD temp.y, -coef.y, chroma.z, nv12_coef.z;\n");
7231         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
7232     }
7233     else
7234     {
7235         /* Read from [size - size+size/2] */
7236         shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.y, size.y;\n");
7237 
7238         shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
7239         /* Multiply by 2 since chroma components are stored in UV pixel pairs,
7240          * add 0.5 to hit the center of the pixel. */
7241         shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n");
7242 
7243         /* Clamp */
7244         shader_addline(buffer, "MAD temp.y, size.y, coef.y, size.y;\n");
7245         shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n");
7246         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
7247         shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n");
7248         shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
7249     }
7250     /* Read the texture, put the result into the output register. */
7251     shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
7252     shader_addline(buffer, "MOV chroma.y, temp.w;\n");
7253 
7254     if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7255     {
7256         /* Add 1/size.x */
7257         shader_addline(buffer, "ADD texcrd.x, texcrd.x, chroma.w;\n");
7258     }
7259     else
7260     {
7261         /* Add 1 */
7262         shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.x;\n");
7263     }
7264     shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
7265     shader_addline(buffer, "MOV chroma.x, temp.w;\n");
7266 
7267     /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate.
7268      * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance
7269      * values due to filtering. */
7270     shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
7271     if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7272     {
7273         /* Multiply the y coordinate by 2/3 and clamp it */
7274         shader_addline(buffer, "MUL texcrd.y, texcrd.y, nv12_coef.x;\n");
7275         shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, nv12_coef.x;\n");
7276         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
7277         shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
7278     }
7279     else
7280     {
7281         /* Reading from texture_rectangles is pretty straightforward, just use the unmodified
7282          * texture coordinate. It is still a good idea to clamp it though, since the opengl texture
7283          * is bigger
7284          */
7285         shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n");
7286         shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n");
7287         shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
7288     }
7289     *luminance = 'a';
7290 
7291     return TRUE;
7292 }
7293 
7294 /* Context activation is done by the caller. */
7295 static GLuint gen_p8_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type)
7296 {
7297     GLuint shader;
7298     struct wined3d_string_buffer buffer;
7299     const char *tex_target = arbfp_texture_target(type->res_type);
7300 
7301     /* This should not happen because we only use this conversion for
7302      * present blits which don't use color keying. */
7303     if (type->use_color_key)
7304         FIXME("Implement P8 color keying.\n");
7305 
7306     /* Shader header */
7307     if (!string_buffer_init(&buffer))
7308     {
7309         ERR("Failed to initialize shader buffer.\n");
7310         return 0;
7311     }
7312 
7313     GL_EXTCALL(glGenProgramsARB(1, &shader));
7314     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
7315     if (!shader)
7316     {
7317         string_buffer_free(&buffer);
7318         return 0;
7319     }
7320 
7321     shader_addline(&buffer, "!!ARBfp1.0\n");
7322     shader_addline(&buffer, "TEMP index;\n");
7323 
7324     /* { 255/256, 0.5/255*255/256, 0, 0 } */
7325     shader_addline(&buffer, "PARAM constants = { 0.996, 0.00195, 0, 0 };\n");
7326 
7327     /* The alpha-component contains the palette index */
7328     shader_addline(&buffer, "TEX index, fragment.texcoord[0], texture[0], %s;\n", tex_target);
7329 
7330     /* Scale the index by 255/256 and add a bias of '0.5' in order to sample in the middle */
7331     shader_addline(&buffer, "MAD index.a, index.a, constants.x, constants.y;\n");
7332 
7333     /* Use the alpha-component as an index in the palette to get the final color */
7334     shader_addline(&buffer, "TEX result.color, index.a, texture[1], 1D;\n");
7335     shader_addline(&buffer, "END\n");
7336 
7337     shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer);
7338 
7339     string_buffer_free(&buffer);
7340 
7341     return shader;
7342 }
7343 
7344 /* Context activation is done by the caller. */
7345 static void upload_palette(struct wined3d_arbfp_blitter *blitter,
7346         const struct wined3d_texture *texture, struct wined3d_context *context)
7347 {
7348     const struct wined3d_palette *palette = texture->swapchain ? texture->swapchain->palette : NULL;
7349     const struct wined3d_gl_info *gl_info = context->gl_info;
7350 
7351     if (!blitter->palette_texture)
7352         gl_info->gl_ops.gl.p_glGenTextures(1, &blitter->palette_texture);
7353 
7354     GL_EXTCALL(glActiveTexture(GL_TEXTURE1));
7355     gl_info->gl_ops.gl.p_glBindTexture(GL_TEXTURE_1D, blitter->palette_texture);
7356 
7357     gl_info->gl_ops.gl.p_glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
7358 
7359     gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
7360     /* Make sure we have discrete color levels. */
7361     gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
7362     gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
7363     /* TODO: avoid unneeded uploads in the future by adding some SFLAG_PALETTE_DIRTY mechanism */
7364     if (palette)
7365     {
7366         gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 256, 0, GL_BGRA,
7367                 GL_UNSIGNED_INT_8_8_8_8_REV, palette->colors);
7368     }
7369     else
7370     {
7371         static const DWORD black;
7372         FIXME("P8 surface loaded without a palette.\n");
7373         gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 1, 0, GL_BGRA,
7374                 GL_UNSIGNED_INT_8_8_8_8_REV, &black);
7375     }
7376 
7377     /* Switch back to unit 0 in which the 2D texture will be stored. */
7378     context_active_texture(context, gl_info, 0);
7379 }
7380 
7381 /* Context activation is done by the caller. */
7382 static GLuint gen_yuv_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type)
7383 {
7384     GLuint shader;
7385     struct wined3d_string_buffer buffer;
7386     char luminance_component;
7387 
7388     if (type->use_color_key)
7389         FIXME("Implement YUV color keying.\n");
7390 
7391     /* Shader header */
7392     if (!string_buffer_init(&buffer))
7393     {
7394         ERR("Failed to initialize shader buffer.\n");
7395         return 0;
7396     }
7397 
7398     GL_EXTCALL(glGenProgramsARB(1, &shader));
7399     checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))");
7400     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
7401     checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
7402     if (!shader)
7403     {
7404         string_buffer_free(&buffer);
7405         return 0;
7406     }
7407 
7408     /* The YUY2 and UYVY formats contain two pixels packed into a 32 bit macropixel,
7409      * giving effectively 16 bit per pixel. The color consists of a luminance(Y) and
7410      * two chroma(U and V) values. Each macropixel has two luminance values, one for
7411      * each single pixel it contains, and one U and one V value shared between both
7412      * pixels.
7413      *
7414      * The data is loaded into an A8L8 texture. With YUY2, the luminance component
7415      * contains the luminance and alpha the chroma. With UYVY it is vice versa. Thus
7416      * take the format into account when generating the read swizzles
7417      *
7418      * Reading the Y value is straightforward - just sample the texture. The hardware
7419      * takes care of filtering in the horizontal and vertical direction.
7420      *
7421      * Reading the U and V values is harder. We have to avoid filtering horizontally,
7422      * because that would mix the U and V values of one pixel or two adjacent pixels.
7423      * Thus floor the texture coordinate and add 0.5 to get an unfiltered read,
7424      * regardless of the filtering setting. Vertical filtering works automatically
7425      * though - the U and V values of two rows are mixed nicely.
7426      *
7427      * Apart of avoiding filtering issues, the code has to know which value it just
7428      * read, and where it can find the other one. To determine this, it checks if
7429      * it sampled an even or odd pixel, and shifts the 2nd read accordingly.
7430      *
7431      * Handling horizontal filtering of U and V values requires reading a 2nd pair
7432      * of pixels, extracting U and V and mixing them. This is not implemented yet.
7433      *
7434      * An alternative implementation idea is to load the texture as A8R8G8B8 texture,
7435      * with width / 2. This way one read gives all 3 values, finding U and V is easy
7436      * in an unfiltered situation. Finding the luminance on the other hand requires
7437      * finding out if it is an odd or even pixel. The real drawback of this approach
7438      * is filtering. This would have to be emulated completely in the shader, reading
7439      * up two 2 packed pixels in up to 2 rows and interpolating both horizontally and
7440      * vertically. Beyond that it would require adjustments to the texture handling
7441      * code to deal with the width scaling
7442      */
7443     shader_addline(&buffer, "!!ARBfp1.0\n");
7444     shader_addline(&buffer, "TEMP luminance;\n");
7445     shader_addline(&buffer, "TEMP temp;\n");
7446     shader_addline(&buffer, "TEMP chroma;\n");
7447     shader_addline(&buffer, "TEMP texcrd;\n");
7448     shader_addline(&buffer, "TEMP texcrd2;\n");
7449     shader_addline(&buffer, "PARAM coef = {1.0, 0.5, 2.0, 0.25};\n");
7450     shader_addline(&buffer, "PARAM yuv_coef = {1.403, 0.344, 0.714, 1.770};\n");
7451     shader_addline(&buffer, "PARAM size = program.local[%u];\n", ARBFP_BLIT_PARAM_SIZE);
7452 
7453     switch (type->fixup)
7454     {
7455         case COMPLEX_FIXUP_UYVY:
7456         case COMPLEX_FIXUP_YUY2:
7457             if (!gen_planar_yuv_read(&buffer, type, &luminance_component))
7458             {
7459                 string_buffer_free(&buffer);
7460                 return 0;
7461             }
7462             break;
7463 
7464         case COMPLEX_FIXUP_YV12:
7465             if (!gen_yv12_read(&buffer, type, &luminance_component))
7466             {
7467                 string_buffer_free(&buffer);
7468                 return 0;
7469             }
7470             break;
7471 
7472         case COMPLEX_FIXUP_NV12:
7473             if (!gen_nv12_read(&buffer, type, &luminance_component))
7474             {
7475                 string_buffer_free(&buffer);
7476                 return 0;
7477             }
7478             break;
7479 
7480         default:
7481             FIXME("Unsupported YUV fixup %#x\n", type->fixup);
7482             string_buffer_free(&buffer);
7483             return 0;
7484     }
7485 
7486     /* Calculate the final result. Formula is taken from
7487      * http://www.fourcc.org/fccyvrgb.php. Note that the chroma
7488      * ranges from -0.5 to 0.5
7489      */
7490     shader_addline(&buffer, "SUB chroma.xy, chroma, coef.y;\n");
7491 
7492     shader_addline(&buffer, "MAD result.color.x, chroma.x, yuv_coef.x, luminance.%c;\n", luminance_component);
7493     shader_addline(&buffer, "MAD temp.x, -chroma.y, yuv_coef.y, luminance.%c;\n", luminance_component);
7494     shader_addline(&buffer, "MAD result.color.y, -chroma.x, yuv_coef.z, temp.x;\n");
7495     shader_addline(&buffer, "MAD result.color.z, chroma.y, yuv_coef.w, luminance.%c;\n", luminance_component);
7496     shader_addline(&buffer, "END\n");
7497 
7498     shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer);
7499 
7500     string_buffer_free(&buffer);
7501 
7502     return shader;
7503 }
7504 
7505 /* Context activation is done by the caller. */
7506 static GLuint arbfp_gen_plain_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type)
7507 {
7508     GLuint shader;
7509     struct wined3d_string_buffer buffer;
7510     const char *tex_target = arbfp_texture_target(type->res_type);
7511 
7512     /* Shader header */
7513     if (!string_buffer_init(&buffer))
7514     {
7515         ERR("Failed to initialize shader buffer.\n");
7516         return 0;
7517     }
7518 
7519     GL_EXTCALL(glGenProgramsARB(1, &shader));
7520     if (!shader)
7521     {
7522         string_buffer_free(&buffer);
7523         return 0;
7524     }
7525     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
7526 
7527     shader_addline(&buffer, "!!ARBfp1.0\n");
7528 
7529     if (type->use_color_key)
7530     {
7531         shader_addline(&buffer, "TEMP color;\n");
7532         shader_addline(&buffer, "TEMP less, greater;\n");
7533         shader_addline(&buffer, "PARAM color_key_low = program.local[%u];\n", ARBFP_BLIT_PARAM_COLOR_KEY_LOW);
7534         shader_addline(&buffer, "PARAM color_key_high = program.local[%u];\n", ARBFP_BLIT_PARAM_COLOR_KEY_HIGH);
7535         shader_addline(&buffer, "TEX color, fragment.texcoord[0], texture[0], %s;\n", tex_target);
7536         shader_addline(&buffer, "SLT less, color, color_key_low;\n"); /* below low key */
7537         shader_addline(&buffer, "SGE greater, color, color_key_high;\n"); /* above high key */
7538         shader_addline(&buffer, "ADD less, less, greater;\n"); /* or */
7539         shader_addline(&buffer, "DP4 less.b, less, less;\n"); /* on any channel */
7540         shader_addline(&buffer, "SGE less, -less.b, 0.0;\n"); /* logical not */
7541         shader_addline(&buffer, "KIL -less;\n"); /* discard if true */
7542         shader_addline(&buffer, "MOV result.color, color;\n");
7543     }
7544     else
7545     {
7546         shader_addline(&buffer, "TEX result.color, fragment.texcoord[0], texture[0], %s;\n", tex_target);
7547     }
7548 
7549     shader_addline(&buffer, "END\n");
7550 
7551     shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer);
7552 
7553     string_buffer_free(&buffer);
7554 
7555     return shader;
7556 }
7557 
7558 /* Context activation is done by the caller. */
7559 static HRESULT arbfp_blit_set(struct wined3d_arbfp_blitter *blitter, struct wined3d_context *context,
7560         const struct wined3d_surface *surface, const struct wined3d_color_key *color_key)
7561 {
7562     const struct wined3d_texture *texture = surface->container;
7563     enum complex_fixup fixup;
7564     const struct wined3d_gl_info *gl_info = context->gl_info;
7565     struct wine_rb_entry *entry;
7566     struct arbfp_blit_type type;
7567     struct arbfp_blit_desc *desc;
7568     struct wined3d_color float_color_key[2];
7569     struct wined3d_vec4 size;
7570     GLuint shader;
7571 
7572     size.x = wined3d_texture_get_level_pow2_width(texture, surface->texture_level);
7573     size.y = wined3d_texture_get_level_pow2_height(texture, surface->texture_level);
7574     size.z = 1.0f;
7575     size.w = 1.0f;
7576 
7577     if (is_complex_fixup(texture->resource.format->color_fixup))
7578         fixup = get_complex_fixup(texture->resource.format->color_fixup);
7579     else
7580         fixup = COMPLEX_FIXUP_NONE;
7581 
7582     switch (texture->target)
7583     {
7584         case GL_TEXTURE_1D:
7585             type.res_type = WINED3D_GL_RES_TYPE_TEX_1D;
7586             break;
7587 
7588         case GL_TEXTURE_2D:
7589             type.res_type = WINED3D_GL_RES_TYPE_TEX_2D;
7590             break;
7591 
7592         case GL_TEXTURE_3D:
7593             type.res_type = WINED3D_GL_RES_TYPE_TEX_3D;
7594             break;
7595 
7596         case GL_TEXTURE_CUBE_MAP_ARB:
7597             type.res_type = WINED3D_GL_RES_TYPE_TEX_CUBE;
7598             break;
7599 
7600         case GL_TEXTURE_RECTANGLE_ARB:
7601             type.res_type = WINED3D_GL_RES_TYPE_TEX_RECT;
7602             break;
7603 
7604         default:
7605             ERR("Unexpected GL texture type %#x.\n", texture->target);
7606             type.res_type = WINED3D_GL_RES_TYPE_TEX_2D;
7607     }
7608     type.fixup = fixup;
7609     type.use_color_key = !!color_key;
7610     type.padding = 0;
7611 
7612     if ((entry = wine_rb_get(&blitter->shaders, &type)))
7613     {
7614         desc = WINE_RB_ENTRY_VALUE(entry, struct arbfp_blit_desc, entry);
7615         shader = desc->shader;
7616     }
7617     else
7618     {
7619         switch (fixup)
7620         {
7621             case COMPLEX_FIXUP_NONE:
7622                 if (!is_identity_fixup(texture->resource.format->color_fixup))
7623                     FIXME("Implement support for sign or swizzle fixups.\n");
7624                 shader = arbfp_gen_plain_shader(gl_info, &type);
7625                 break;
7626 
7627             case COMPLEX_FIXUP_P8:
7628                 shader = gen_p8_shader(gl_info, &type);
7629                 break;
7630 
7631             case COMPLEX_FIXUP_YUY2:
7632             case COMPLEX_FIXUP_UYVY:
7633             case COMPLEX_FIXUP_YV12:
7634             case COMPLEX_FIXUP_NV12:
7635                 shader = gen_yuv_shader(gl_info, &type);
7636                 break;
7637         }
7638 
7639         if (!shader)
7640         {
7641             FIXME("Unsupported complex fixup %#x, not setting a shader\n", fixup);
7642             return E_NOTIMPL;
7643         }
7644 
7645         desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*desc));
7646         if (!desc)
7647             goto err_out;
7648 
7649         desc->type = type;
7650         desc->shader = shader;
7651         if (wine_rb_put(&blitter->shaders, &desc->type, &desc->entry) == -1)
7652         {
7653 err_out:
7654             ERR("Out of memory\n");
7655             GL_EXTCALL(glDeleteProgramsARB(1, &shader));
7656             checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader))");
7657             GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0));
7658             checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0)");
7659             HeapFree(GetProcessHeap(), 0, desc);
7660             return E_OUTOFMEMORY;
7661         }
7662     }
7663 
7664     if (fixup == COMPLEX_FIXUP_P8)
7665         upload_palette(blitter, texture, context);
7666 
7667     gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB);
7668     checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");
7669     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
7670     checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
7671     GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARBFP_BLIT_PARAM_SIZE, &size.x));
7672     checkGLcall("glProgramLocalParameter4fvARB");
7673     if (type.use_color_key)
7674     {
7675         wined3d_format_get_float_color_key(texture->resource.format, color_key, float_color_key);
7676         GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
7677                 ARBFP_BLIT_PARAM_COLOR_KEY_LOW, &float_color_key[0].r));
7678         GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
7679                 ARBFP_BLIT_PARAM_COLOR_KEY_HIGH, &float_color_key[1].r));
7680         checkGLcall("glProgramLocalParameter4fvARB");
7681     }
7682 
7683     return WINED3D_OK;
7684 }
7685 
7686 /* Context activation is done by the caller. */
7687 static void arbfp_blit_unset(const struct wined3d_gl_info *gl_info)
7688 {
7689     gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB);
7690     checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
7691 }
7692 
7693 static BOOL arbfp_blit_supported(const struct wined3d_gl_info *gl_info,
7694         const struct wined3d_d3d_info *d3d_info, enum wined3d_blit_op blit_op,
7695         enum wined3d_pool src_pool, const struct wined3d_format *src_format, DWORD src_location,
7696         enum wined3d_pool dst_pool, const struct wined3d_format *dst_format, DWORD dst_location)
7697 {
7698     enum complex_fixup src_fixup;
7699     BOOL decompress;
7700 
7701     if (!gl_info->supported[ARB_FRAGMENT_PROGRAM])
7702         return FALSE;
7703 
7704     if (blit_op == WINED3D_BLIT_OP_RAW_BLIT && dst_format->id == src_format->id)
7705     {
7706         if (dst_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
7707             blit_op = WINED3D_BLIT_OP_DEPTH_BLIT;
7708         else
7709             blit_op = WINED3D_BLIT_OP_COLOR_BLIT;
7710     }
7711 
7712     switch (blit_op)
7713     {
7714         case WINED3D_BLIT_OP_COLOR_BLIT_CKEY:
7715             if (!d3d_info->shader_color_key)
7716             {
7717                 /* The conversion modifies the alpha channel so the color key might no longer match. */
7718                 TRACE("Color keying not supported with converted textures.\n");
7719                 return FALSE;
7720             }
7721         case WINED3D_BLIT_OP_COLOR_BLIT_ALPHATEST:
7722         case WINED3D_BLIT_OP_COLOR_BLIT:
7723             break;
7724 
7725         default:
7726             TRACE("Unsupported blit_op=%d\n", blit_op);
7727             return FALSE;
7728     }
7729 
7730     decompress = src_format && (src_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED)
7731             && !(dst_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED);
7732     if (!decompress && (dst_pool == WINED3D_POOL_SYSTEM_MEM || src_pool == WINED3D_POOL_SYSTEM_MEM))
7733         return FALSE;
7734 
7735     src_fixup = get_complex_fixup(src_format->color_fixup);
7736     if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
7737     {
7738         TRACE("Checking support for fixup:\n");
7739         dump_color_fixup_desc(src_format->color_fixup);
7740     }
7741 
7742     if (!is_identity_fixup(dst_format->color_fixup)
7743             && (dst_format->id != src_format->id || dst_location != WINED3D_LOCATION_DRAWABLE))
7744     {
7745         TRACE("Destination fixups are not supported\n");
7746         return FALSE;
7747     }
7748 
7749     if (is_identity_fixup(src_format->color_fixup))
7750     {
7751         TRACE("[OK]\n");
7752         return TRUE;
7753     }
7754 
7755      /* We only support YUV conversions. */
7756     if (!is_complex_fixup(src_format->color_fixup))
7757     {
7758         if (wined3d_settings.offscreen_rendering_mode == ORM_BACKBUFFER)
7759         {
7760             WARN("Claiming fixup support because of ORM_BACKBUFFER.\n");
7761             return TRUE;
7762         }
7763 
7764         TRACE("[FAILED]\n");
7765         return FALSE;
7766     }
7767 
7768     switch(src_fixup)
7769     {
7770         case COMPLEX_FIXUP_YUY2:
7771         case COMPLEX_FIXUP_UYVY:
7772         case COMPLEX_FIXUP_YV12:
7773         case COMPLEX_FIXUP_NV12:
7774         case COMPLEX_FIXUP_P8:
7775             TRACE("[OK]\n");
7776             return TRUE;
7777 
7778         default:
7779             FIXME("Unsupported YUV fixup %#x\n", src_fixup);
7780             TRACE("[FAILED]\n");
7781             return FALSE;
7782     }
7783 }
7784 
7785 static DWORD arbfp_blitter_blit(struct wined3d_blitter *blitter, enum wined3d_blit_op op,
7786         struct wined3d_context *context, struct wined3d_surface *src_surface, DWORD src_location,
7787         const RECT *src_rect, struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect,
7788         const struct wined3d_color_key *color_key, enum wined3d_texture_filter_type filter)
7789 {
7790     struct wined3d_texture *src_texture = src_surface->container;
7791     struct wined3d_texture *dst_texture = dst_surface->container;
7792     struct wined3d_device *device = dst_texture->resource.device;
7793     struct wined3d_arbfp_blitter *arbfp_blitter;
7794     struct wined3d_color_key alpha_test_key;
7795     struct wined3d_blitter *next;
7796     RECT s, d;
7797 
7798     if (!arbfp_blit_supported(&device->adapter->gl_info, &device->adapter->d3d_info, op,
7799             src_texture->resource.pool, src_texture->resource.format, src_location,
7800             dst_texture->resource.pool, dst_texture->resource.format, dst_location))
7801     {
7802         if ((next = blitter->next))
7803             return next->ops->blitter_blit(next, op, context, src_surface, src_location,
7804                     src_rect, dst_surface, dst_location, dst_rect, color_key, filter);
7805     }
7806 
7807     arbfp_blitter = CONTAINING_RECORD(blitter, struct wined3d_arbfp_blitter, blitter);
7808 
7809     /* Now load the surface */
7810     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
7811             && (surface_get_sub_resource(src_surface)->locations
7812             & (WINED3D_LOCATION_TEXTURE_RGB | WINED3D_LOCATION_DRAWABLE))
7813             == WINED3D_LOCATION_DRAWABLE
7814             && !wined3d_resource_is_offscreen(&src_texture->resource))
7815     {
7816         /* Without FBO blits transferring from the drawable to the texture is
7817          * expensive, because we have to flip the data in sysmem. Since we can
7818          * flip in the blitter, we don't actually need that flip anyway. So we
7819          * use the surface's texture as scratch texture, and flip the source
7820          * rectangle instead. */
7821         surface_load_fb_texture(src_surface, FALSE, context);
7822 
7823         s = *src_rect;
7824         s.top = wined3d_texture_get_level_height(src_texture, src_surface->texture_level) - s.top;
7825         s.bottom = wined3d_texture_get_level_height(src_texture, src_surface->texture_level) - s.bottom;
7826         src_rect = &s;
7827     }
7828     else
7829         wined3d_texture_load(src_texture, context, FALSE);
7830 
7831     context_apply_blit_state(context, device);
7832 
7833     if (dst_location == WINED3D_LOCATION_DRAWABLE)
7834     {
7835         d = *dst_rect;
7836         surface_translate_drawable_coords(dst_surface, context->win_handle, &d);
7837         dst_rect = &d;
7838     }
7839 
7840     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
7841     {
7842         GLenum buffer;
7843 
7844         if (dst_location == WINED3D_LOCATION_DRAWABLE)
7845         {
7846             TRACE("Destination surface %p is onscreen.\n", dst_surface);
7847             buffer = wined3d_texture_get_gl_buffer(dst_texture);
7848         }
7849         else
7850         {
7851             TRACE("Destination surface %p is offscreen.\n", dst_surface);
7852             buffer = GL_COLOR_ATTACHMENT0;
7853         }
7854         context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
7855         context_set_draw_buffer(context, buffer);
7856         context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
7857         context_invalidate_state(context, STATE_FRAMEBUFFER);
7858     }
7859 
7860     if (op == WINED3D_BLIT_OP_COLOR_BLIT_ALPHATEST)
7861     {
7862         const struct wined3d_format *fmt = src_texture->resource.format;
7863         alpha_test_key.color_space_low_value = 0;
7864         alpha_test_key.color_space_high_value = ~(((1u << fmt->alpha_size) - 1) << fmt->alpha_offset);
7865         color_key = &alpha_test_key;
7866     }
7867 
7868     arbfp_blit_set(arbfp_blitter, context, src_surface, color_key);
7869 
7870     /* Draw a textured quad */
7871     draw_textured_quad(src_surface, context, src_rect, dst_rect, filter);
7872 
7873     /* Leave the opengl state valid for blitting */
7874     arbfp_blit_unset(context->gl_info);
7875 
7876     if (wined3d_settings.strict_draw_ordering
7877             || (dst_texture->swapchain && (dst_texture->swapchain->front_buffer == dst_texture)))
7878         context->gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
7879 
7880     return dst_location;
7881 }
7882 
7883 static void arbfp_blitter_clear(struct wined3d_blitter *blitter, struct wined3d_device *device,
7884         unsigned int rt_count, const struct wined3d_fb_state *fb, unsigned int rect_count, const RECT *clear_rects,
7885         const RECT *draw_rect, DWORD flags, const struct wined3d_color *colour, float depth, DWORD stencil)
7886 {
7887     struct wined3d_blitter *next;
7888 
7889     if ((next = blitter->next))
7890         next->ops->blitter_clear(next, device, rt_count, fb, rect_count,
7891                 clear_rects, draw_rect, flags, colour, depth, stencil);
7892 }
7893 
7894 static const struct wined3d_blitter_ops arbfp_blitter_ops =
7895 {
7896     arbfp_blitter_destroy,
7897     arbfp_blitter_clear,
7898     arbfp_blitter_blit,
7899 };
7900 
7901 void wined3d_arbfp_blitter_create(struct wined3d_blitter **next, const struct wined3d_device *device)
7902 {
7903     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7904     struct wined3d_arbfp_blitter *blitter;
7905 
7906     if (device->shader_backend != &arb_program_shader_backend
7907             && device->shader_backend != &glsl_shader_backend)
7908         return;
7909 
7910     if (!gl_info->supported[ARB_FRAGMENT_PROGRAM])
7911         return;
7912 
7913     if (!gl_info->supported[WINED3D_GL_LEGACY_CONTEXT])
7914         return;
7915 
7916     if (!(blitter = HeapAlloc(GetProcessHeap(), 0, sizeof(*blitter))))
7917     {
7918         ERR("Failed to allocate blitter.\n");
7919         return;
7920     }
7921 
7922     TRACE("Created blitter %p.\n", blitter);
7923 
7924     blitter->blitter.ops = &arbfp_blitter_ops;
7925     blitter->blitter.next = *next;
7926     wine_rb_init(&blitter->shaders, arbfp_blit_type_compare);
7927     blitter->palette_texture = 0;
7928     *next = &blitter->blitter;
7929 }
7930