1 /* 2 * Pixel and vertex shaders implementation using ARB_vertex_program 3 * and ARB_fragment_program GL extensions. 4 * 5 * Copyright 2002-2003 Jason Edmeades 6 * Copyright 2002-2003 Raphael Junqueira 7 * Copyright 2004 Christian Costa 8 * Copyright 2005 Oliver Stieber 9 * Copyright 2006 Ivan Gyurdiev 10 * Copyright 2006 Jason Green 11 * Copyright 2006 Henri Verbeet 12 * Copyright 2007-2011, 2013-2014 Stefan Dösinger for CodeWeavers 13 * Copyright 2009 Henri Verbeet for CodeWeavers 14 * 15 * This library is free software; you can redistribute it and/or 16 * modify it under the terms of the GNU Lesser General Public 17 * License as published by the Free Software Foundation; either 18 * version 2.1 of the License, or (at your option) any later version. 19 * 20 * This library is distributed in the hope that it will be useful, 21 * but WITHOUT ANY WARRANTY; without even the implied warranty of 22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 * Lesser General Public License for more details. 24 * 25 * You should have received a copy of the GNU Lesser General Public 26 * License along with this library; if not, write to the Free Software 27 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 28 */ 29 30 #include "config.h" 31 #include "wine/port.h" 32 33 #include <stdio.h> 34 35 #include "wined3d_private.h" 36 37 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader); 38 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants); 39 WINE_DECLARE_DEBUG_CHANNEL(d3d); 40 WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); 41 42 static BOOL shader_is_pshader_version(enum wined3d_shader_type type) 43 { 44 return type == WINED3D_SHADER_TYPE_PIXEL; 45 } 46 47 static BOOL shader_is_vshader_version(enum wined3d_shader_type type) 48 { 49 return type == WINED3D_SHADER_TYPE_VERTEX; 50 } 51 52 static const char *get_line(const char **ptr) 53 { 54 const char *p, *q; 55 56 p = *ptr; 57 if (!(q = strstr(p, "\n"))) 58 { 59 if (!*p) return NULL; 60 *ptr += strlen(p); 61 return p; 62 } 63 *ptr = q + 1; 64 65 return p; 66 } 67 68 enum arb_helper_value 69 { 70 ARB_ZERO, 71 ARB_ONE, 72 ARB_TWO, 73 ARB_0001, 74 ARB_EPS, 75 76 ARB_VS_REL_OFFSET 77 }; 78 79 static const char *arb_get_helper_value(enum wined3d_shader_type shader, enum arb_helper_value value) 80 { 81 if (shader != WINED3D_SHADER_TYPE_VERTEX && shader != WINED3D_SHADER_TYPE_PIXEL) 82 { 83 ERR("Unsupported shader type '%s'.\n", debug_shader_type(shader)); 84 return "bad"; 85 } 86 87 if (shader == WINED3D_SHADER_TYPE_PIXEL) 88 { 89 switch (value) 90 { 91 case ARB_ZERO: return "ps_helper_const.x"; 92 case ARB_ONE: return "ps_helper_const.y"; 93 case ARB_TWO: return "coefmul.x"; 94 case ARB_0001: return "ps_helper_const.xxxy"; 95 case ARB_EPS: return "ps_helper_const.z"; 96 default: break; 97 } 98 } 99 else 100 { 101 switch (value) 102 { 103 case ARB_ZERO: return "helper_const.x"; 104 case ARB_ONE: return "helper_const.y"; 105 case ARB_TWO: return "helper_const.z"; 106 case ARB_EPS: return "helper_const.w"; 107 case ARB_0001: return "helper_const.xxxy"; 108 case ARB_VS_REL_OFFSET: return "rel_addr_const.y"; 109 } 110 } 111 FIXME("Unmanaged %s shader helper constant requested: %u.\n", 112 shader == WINED3D_SHADER_TYPE_PIXEL ? "pixel" : "vertex", value); 113 switch (value) 114 { 115 case ARB_ZERO: return "0.0"; 116 case ARB_ONE: return "1.0"; 117 case ARB_TWO: return "2.0"; 118 case ARB_0001: return "{0.0, 0.0, 0.0, 1.0}"; 119 case ARB_EPS: return "1e-8"; 120 default: return "bad"; 121 } 122 } 123 124 static inline BOOL ffp_clip_emul(const struct wined3d_context *context) 125 { 126 return context->lowest_disabled_stage < 7; 127 } 128 129 /* ARB_program_shader private data */ 130 131 struct control_frame 132 { 133 struct list entry; 134 enum 135 { 136 IF, 137 IFC, 138 LOOP, 139 REP 140 } type; 141 BOOL muting; 142 BOOL outer_loop; 143 union 144 { 145 unsigned int loop; 146 unsigned int ifc; 147 } no; 148 struct wined3d_shader_loop_control loop_control; 149 BOOL had_else; 150 }; 151 152 struct arb_ps_np2fixup_info 153 { 154 struct ps_np2fixup_info super; 155 /* For ARB we need an offset value: 156 * With both GLSL and ARB mode the NP2 fixup information (the texture dimensions) are stored in a 157 * consecutive way (GLSL uses a uniform array). Since ARB doesn't know the notion of a "standalone" 158 * array we need an offset to the index inside the program local parameter array. */ 159 UINT offset; 160 }; 161 162 struct arb_ps_compile_args 163 { 164 struct ps_compile_args super; 165 WORD bools; 166 WORD clip; /* only a boolean, use a WORD for alignment */ 167 unsigned char loop_ctrl[WINED3D_MAX_CONSTS_I][3]; 168 }; 169 170 struct stb_const_desc 171 { 172 unsigned char texunit; 173 UINT const_num; 174 }; 175 176 struct arb_ps_compiled_shader 177 { 178 struct arb_ps_compile_args args; 179 struct arb_ps_np2fixup_info np2fixup_info; 180 struct stb_const_desc bumpenvmatconst[MAX_TEXTURES]; 181 struct stb_const_desc luminanceconst[MAX_TEXTURES]; 182 UINT int_consts[WINED3D_MAX_CONSTS_I]; 183 GLuint prgId; 184 UINT ycorrection; 185 unsigned char numbumpenvmatconsts; 186 char num_int_consts; 187 }; 188 189 struct arb_vs_compile_args 190 { 191 struct vs_compile_args super; 192 union 193 { 194 struct 195 { 196 WORD bools; 197 unsigned char clip_texcoord; 198 unsigned char clipplane_mask; 199 } boolclip; 200 DWORD boolclip_compare; 201 } clip; 202 DWORD ps_signature; 203 union 204 { 205 unsigned char samplers[4]; 206 DWORD samplers_compare; 207 } vertex; 208 unsigned char loop_ctrl[WINED3D_MAX_CONSTS_I][3]; 209 }; 210 211 struct arb_vs_compiled_shader 212 { 213 struct arb_vs_compile_args args; 214 GLuint prgId; 215 UINT int_consts[WINED3D_MAX_CONSTS_I]; 216 char num_int_consts; 217 char need_color_unclamp; 218 UINT pos_fixup; 219 }; 220 221 struct recorded_instruction 222 { 223 struct wined3d_shader_instruction ins; 224 struct list entry; 225 }; 226 227 struct shader_arb_ctx_priv 228 { 229 char addr_reg[20]; 230 enum 231 { 232 /* plain GL_ARB_vertex_program or GL_ARB_fragment_program */ 233 ARB, 234 /* GL_NV_vertex_program2_option or GL_NV_fragment_program_option */ 235 NV2, 236 /* GL_NV_vertex_program3 or GL_NV_fragment_program2 */ 237 NV3 238 } target_version; 239 240 const struct arb_vs_compile_args *cur_vs_args; 241 const struct arb_ps_compile_args *cur_ps_args; 242 const struct arb_ps_compiled_shader *compiled_fprog; 243 const struct arb_vs_compiled_shader *compiled_vprog; 244 struct arb_ps_np2fixup_info *cur_np2fixup_info; 245 struct list control_frames; 246 struct list record; 247 BOOL recording; 248 BOOL muted; 249 unsigned int num_loops, loop_depth, num_ifcs; 250 int aL; 251 BOOL ps_post_process; 252 253 unsigned int vs_clipplanes; 254 BOOL footer_written; 255 BOOL in_main_func; 256 257 /* For 3.0 vertex shaders */ 258 const char *vs_output[MAX_REG_OUTPUT]; 259 /* For 2.x and earlier vertex shaders */ 260 const char *texcrd_output[8], *color_output[2], *fog_output; 261 262 /* 3.0 pshader input for compatibility with fixed function */ 263 const char *ps_input[MAX_REG_INPUT]; 264 }; 265 266 struct ps_signature 267 { 268 struct wined3d_shader_signature sig; 269 DWORD idx; 270 struct wine_rb_entry entry; 271 }; 272 273 struct arb_pshader_private { 274 struct arb_ps_compiled_shader *gl_shaders; 275 UINT num_gl_shaders, shader_array_size; 276 DWORD input_signature_idx; 277 DWORD clipplane_emulation; 278 BOOL clamp_consts; 279 }; 280 281 struct arb_vshader_private { 282 struct arb_vs_compiled_shader *gl_shaders; 283 UINT num_gl_shaders, shader_array_size; 284 UINT rel_offset; 285 }; 286 287 struct shader_arb_priv 288 { 289 GLuint current_vprogram_id; 290 GLuint current_fprogram_id; 291 const struct arb_ps_compiled_shader *compiled_fprog; 292 const struct arb_vs_compiled_shader *compiled_vprog; 293 BOOL use_arbfp_fixed_func; 294 struct wine_rb_tree fragment_shaders; 295 BOOL last_ps_const_clamped; 296 BOOL last_vs_color_unclamp; 297 298 struct wine_rb_tree signature_tree; 299 DWORD ps_sig_number; 300 301 unsigned int highest_dirty_ps_const, highest_dirty_vs_const; 302 char vshader_const_dirty[WINED3D_MAX_VS_CONSTS_F]; 303 char pshader_const_dirty[WINED3D_MAX_PS_CONSTS_F]; 304 const struct wined3d_context *last_context; 305 306 const struct wined3d_vertex_pipe_ops *vertex_pipe; 307 const struct fragment_pipeline *fragment_pipe; 308 BOOL ffp_proj_control; 309 }; 310 311 /* Context activation for state handlers is done by the caller. */ 312 313 static BOOL need_rel_addr_const(const struct arb_vshader_private *shader_data, 314 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 315 { 316 if (shader_data->rel_offset) return TRUE; 317 if (!reg_maps->usesmova) return FALSE; 318 return !gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]; 319 } 320 321 /* Returns TRUE if result.clip from GL_NV_vertex_program2 should be used and FALSE otherwise */ 322 static inline BOOL use_nv_clip(const struct wined3d_gl_info *gl_info) 323 { 324 return gl_info->supported[NV_VERTEX_PROGRAM2_OPTION] 325 && !(gl_info->quirks & WINED3D_QUIRK_NV_CLIP_BROKEN); 326 } 327 328 static BOOL need_helper_const(const struct arb_vshader_private *shader_data, 329 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 330 { 331 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) return TRUE; 332 if (!gl_info->supported[NV_VERTEX_PROGRAM]) return TRUE; /* Need to init colors. */ 333 if (gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) return TRUE; /* Load the immval offset. */ 334 if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) return TRUE; /* Have to init texcoords. */ 335 if (!use_nv_clip(gl_info)) return TRUE; /* Init the clip texcoord */ 336 if (reg_maps->usesnrm) return TRUE; /* 0.0 */ 337 if (reg_maps->usespow) return TRUE; /* EPS, 0.0 and 1.0 */ 338 if (reg_maps->fog) return TRUE; /* Clamping fog coord, 0.0 and 1.0 */ 339 return FALSE; 340 } 341 342 static unsigned int reserved_vs_const(const struct arb_vshader_private *shader_data, 343 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 344 { 345 unsigned int ret = 1; 346 /* We use one PARAM for the pos fixup, and in some cases one to load 347 * some immediate values into the shader. */ 348 if (need_helper_const(shader_data, reg_maps, gl_info)) ++ret; 349 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) ++ret; 350 return ret; 351 } 352 353 /* Loads floating point constants into the currently set ARB_vertex/fragment_program. 354 * When constant_list == NULL, it will load all the constants. 355 * 356 * @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders) 357 * or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders) 358 */ 359 /* Context activation is done by the caller. */ 360 static unsigned int shader_arb_load_constants_f(const struct wined3d_shader *shader, 361 const struct wined3d_gl_info *gl_info, GLuint target_type, unsigned int max_constants, 362 const struct wined3d_vec4 *constants, char *dirty_consts) 363 { 364 struct wined3d_shader_lconst *lconst; 365 unsigned int ret, i, j; 366 367 if (TRACE_ON(d3d_constants)) 368 { 369 for (i = 0; i < max_constants; ++i) 370 { 371 if (!dirty_consts[i]) 372 continue; 373 TRACE_(d3d_constants)("Loading constant %u: %s.\n", i, debug_vec4(&constants[i])); 374 } 375 } 376 377 i = 0; 378 379 /* In 1.X pixel shaders constants are implicitly clamped in the range [-1;1] */ 380 if (target_type == GL_FRAGMENT_PROGRAM_ARB && shader->reg_maps.shader_version.major == 1) 381 { 382 float lcl_const[4]; 383 /* ps 1.x supports only 8 constants, clamp only those. When switching between 1.x and higher 384 * shaders, the first 8 constants are marked dirty for reload 385 */ 386 for (; i < min(8, max_constants); ++i) 387 { 388 if (!dirty_consts[i]) 389 continue; 390 dirty_consts[i] = 0; 391 392 if (constants[i].x > 1.0f) 393 lcl_const[0] = 1.0f; 394 else if (constants[i].x < -1.0f) 395 lcl_const[0] = -1.0f; 396 else 397 lcl_const[0] = constants[i].x; 398 399 if (constants[i].y > 1.0f) 400 lcl_const[1] = 1.0f; 401 else if (constants[i].y < -1.0f) 402 lcl_const[1] = -1.0f; 403 else 404 lcl_const[1] = constants[i].y; 405 406 if (constants[i].z > 1.0f) 407 lcl_const[2] = 1.0f; 408 else if (constants[i].z < -1.0f) 409 lcl_const[2] = -1.0f; 410 else 411 lcl_const[2] = constants[i].z; 412 413 if (constants[i].w > 1.0f) 414 lcl_const[3] = 1.0f; 415 else if (constants[i].w < -1.0f) 416 lcl_const[3] = -1.0f; 417 else 418 lcl_const[3] = constants[i].w; 419 420 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, lcl_const)); 421 } 422 423 /* If further constants are dirty, reload them without clamping. 424 * 425 * The alternative is not to touch them, but then we cannot reset the dirty constant count 426 * to zero. That's bad for apps that only use PS 1.x shaders, because in that case the code 427 * above would always re-check the first 8 constants since max_constant remains at the init 428 * value 429 */ 430 } 431 432 if (gl_info->supported[EXT_GPU_PROGRAM_PARAMETERS]) 433 { 434 /* TODO: Benchmark if we're better of with finding the dirty constants ourselves, 435 * or just reloading *all* constants at once 436 * 437 GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, i, max_constants, constants + (i * 4))); 438 */ 439 for (; i < max_constants; ++i) 440 { 441 if (!dirty_consts[i]) 442 continue; 443 444 /* Find the next block of dirty constants */ 445 dirty_consts[i] = 0; 446 j = i; 447 for (++i; (i < max_constants) && dirty_consts[i]; ++i) 448 { 449 dirty_consts[i] = 0; 450 } 451 452 GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, j, i - j, &constants[j].x)); 453 } 454 } 455 else 456 { 457 for (; i < max_constants; ++i) 458 { 459 if (dirty_consts[i]) 460 { 461 dirty_consts[i] = 0; 462 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, &constants[i].x)); 463 } 464 } 465 } 466 checkGLcall("glProgramEnvParameter4fvARB()"); 467 468 /* Load immediate constants */ 469 if (shader->load_local_constsF) 470 { 471 if (TRACE_ON(d3d_shader)) 472 { 473 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 474 { 475 GLfloat* values = (GLfloat*)lconst->value; 476 TRACE_(d3d_constants)("Loading local constants %i: %f, %f, %f, %f\n", lconst->idx, 477 values[0], values[1], values[2], values[3]); 478 } 479 } 480 /* Immediate constants are clamped for 1.X shaders at loading times */ 481 ret = 0; 482 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 483 { 484 dirty_consts[lconst->idx] = 1; /* Dirtify so the non-immediate constant overwrites it next time */ 485 ret = max(ret, lconst->idx + 1); 486 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, lconst->idx, (GLfloat*)lconst->value)); 487 } 488 checkGLcall("glProgramEnvParameter4fvARB()"); 489 return ret; /* The loaded immediate constants need reloading for the next shader */ 490 } else { 491 return 0; /* No constants are dirty now */ 492 } 493 } 494 495 /* Loads the texture dimensions for NP2 fixup into the currently set 496 * ARB_[vertex/fragment]_programs. */ 497 static void shader_arb_load_np2fixup_constants(const struct arb_ps_np2fixup_info *fixup, 498 const struct wined3d_gl_info *gl_info, const struct wined3d_state *state) 499 { 500 GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS]; 501 WORD active = fixup->super.active; 502 UINT i; 503 504 if (!active) 505 return; 506 507 for (i = 0; active; active >>= 1, ++i) 508 { 509 const struct wined3d_texture *tex = state->textures[i]; 510 unsigned char idx = fixup->super.idx[i]; 511 GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4]; 512 513 if (!(active & 1)) 514 continue; 515 516 if (!tex) 517 { 518 ERR("Nonexistent texture is flagged for NP2 texcoord fixup.\n"); 519 continue; 520 } 521 522 if (idx % 2) 523 { 524 tex_dim[2] = tex->pow2_matrix[0]; 525 tex_dim[3] = tex->pow2_matrix[5]; 526 } 527 else 528 { 529 tex_dim[0] = tex->pow2_matrix[0]; 530 tex_dim[1] = tex->pow2_matrix[5]; 531 } 532 } 533 534 for (i = 0; i < fixup->super.num_consts; ++i) 535 { 536 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 537 fixup->offset + i, &np2fixup_constants[i * 4])); 538 } 539 } 540 541 /* Context activation is done by the caller. */ 542 static void shader_arb_ps_local_constants(const struct arb_ps_compiled_shader *gl_shader, 543 const struct wined3d_context *context, const struct wined3d_state *state, UINT rt_height) 544 { 545 const struct wined3d_gl_info *gl_info = context->gl_info; 546 unsigned char i; 547 548 for(i = 0; i < gl_shader->numbumpenvmatconsts; i++) 549 { 550 int texunit = gl_shader->bumpenvmatconst[i].texunit; 551 552 /* The state manager takes care that this function is always called if the bump env matrix changes */ 553 const float *data = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_MAT00]; 554 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 555 gl_shader->bumpenvmatconst[i].const_num, data)); 556 557 if (gl_shader->luminanceconst[i].const_num != WINED3D_CONST_NUM_UNUSED) 558 { 559 /* WINED3D_TSS_BUMPENVLSCALE and WINED3D_TSS_BUMPENVLOFFSET are next to each other. 560 * point gl to the scale, and load 4 floats. x = scale, y = offset, z and w are junk, we 561 * don't care about them. The pointers are valid for sure because the stateblock is bigger. 562 * (they're WINED3D_TSS_TEXTURETRANSFORMFLAGS and WINED3D_TSS_ADDRESSW, so most likely 0 or NaN 563 */ 564 const float *scale = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_LSCALE]; 565 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 566 gl_shader->luminanceconst[i].const_num, scale)); 567 } 568 } 569 checkGLcall("Load bumpmap consts"); 570 571 if(gl_shader->ycorrection != WINED3D_CONST_NUM_UNUSED) 572 { 573 /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen). 574 * ycorrection.y: -1.0(onscreen), 1.0(offscreen) 575 * ycorrection.z: 1.0 576 * ycorrection.w: 0.0 577 */ 578 float val[4]; 579 val[0] = context->render_offscreen ? 0.0f : (float) rt_height; 580 val[1] = context->render_offscreen ? 1.0f : -1.0f; 581 val[2] = 1.0f; 582 val[3] = 0.0f; 583 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->ycorrection, val)); 584 checkGLcall("y correction loading"); 585 } 586 587 if (!gl_shader->num_int_consts) return; 588 589 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 590 { 591 if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED) 592 { 593 float val[4]; 594 val[0] = (float)state->ps_consts_i[i].x; 595 val[1] = (float)state->ps_consts_i[i].y; 596 val[2] = (float)state->ps_consts_i[i].z; 597 val[3] = -1.0f; 598 599 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->int_consts[i], val)); 600 } 601 } 602 checkGLcall("Load ps int consts"); 603 } 604 605 /* Context activation is done by the caller. */ 606 static void shader_arb_vs_local_constants(const struct arb_vs_compiled_shader *gl_shader, 607 const struct wined3d_context *context, const struct wined3d_state *state) 608 { 609 const struct wined3d_gl_info *gl_info = context->gl_info; 610 float position_fixup[4]; 611 unsigned char i; 612 613 /* Upload the position fixup */ 614 shader_get_position_fixup(context, state, 1, position_fixup); 615 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->pos_fixup, position_fixup)); 616 617 if (!gl_shader->num_int_consts) return; 618 619 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 620 { 621 if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED) 622 { 623 float val[4]; 624 val[0] = (float)state->vs_consts_i[i].x; 625 val[1] = (float)state->vs_consts_i[i].y; 626 val[2] = (float)state->vs_consts_i[i].z; 627 val[3] = -1.0f; 628 629 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->int_consts[i], val)); 630 } 631 } 632 checkGLcall("Load vs int consts"); 633 } 634 635 static void shader_arb_select(void *shader_priv, struct wined3d_context *context, 636 const struct wined3d_state *state); 637 638 /** 639 * Loads the app-supplied constants into the currently set ARB_[vertex/fragment]_programs. 640 * 641 * We only support float constants in ARB at the moment, so don't 642 * worry about the Integers or Booleans 643 */ 644 /* Context activation is done by the caller (state handler). */ 645 static void shader_arb_load_constants_internal(struct shader_arb_priv *priv, 646 struct wined3d_context *context, const struct wined3d_state *state, 647 BOOL usePixelShader, BOOL useVertexShader, BOOL from_shader_select) 648 { 649 const struct wined3d_d3d_info *d3d_info = context->d3d_info; 650 const struct wined3d_gl_info *gl_info = context->gl_info; 651 652 if (!from_shader_select) 653 { 654 const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX]; 655 const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 656 657 if (vshader 658 && (vshader->reg_maps.boolean_constants 659 || (!gl_info->supported[NV_VERTEX_PROGRAM2_OPTION] 660 && (vshader->reg_maps.integer_constants & ~vshader->reg_maps.local_int_consts)))) 661 { 662 TRACE("bool/integer vertex shader constants potentially modified, forcing shader reselection.\n"); 663 shader_arb_select(priv, context, state); 664 } 665 else if (pshader 666 && (pshader->reg_maps.boolean_constants 667 || (!gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION] 668 && (pshader->reg_maps.integer_constants & ~pshader->reg_maps.local_int_consts)))) 669 { 670 TRACE("bool/integer pixel shader constants potentially modified, forcing shader reselection.\n"); 671 shader_arb_select(priv, context, state); 672 } 673 } 674 675 if (context != priv->last_context) 676 { 677 memset(priv->vshader_const_dirty, 1, 678 sizeof(*priv->vshader_const_dirty) * d3d_info->limits.vs_uniform_count); 679 priv->highest_dirty_vs_const = d3d_info->limits.vs_uniform_count; 680 681 memset(priv->pshader_const_dirty, 1, 682 sizeof(*priv->pshader_const_dirty) * d3d_info->limits.ps_uniform_count); 683 priv->highest_dirty_ps_const = d3d_info->limits.ps_uniform_count; 684 685 priv->last_context = context; 686 } 687 688 if (useVertexShader) 689 { 690 const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX]; 691 const struct arb_vs_compiled_shader *gl_shader = priv->compiled_vprog; 692 693 /* Load DirectX 9 float constants for vertex shader */ 694 priv->highest_dirty_vs_const = shader_arb_load_constants_f(vshader, gl_info, GL_VERTEX_PROGRAM_ARB, 695 priv->highest_dirty_vs_const, state->vs_consts_f, priv->vshader_const_dirty); 696 shader_arb_vs_local_constants(gl_shader, context, state); 697 } 698 699 if (usePixelShader) 700 { 701 const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 702 const struct arb_ps_compiled_shader *gl_shader = priv->compiled_fprog; 703 UINT rt_height = state->fb->render_targets[0]->height; 704 705 /* Load DirectX 9 float constants for pixel shader */ 706 priv->highest_dirty_ps_const = shader_arb_load_constants_f(pshader, gl_info, GL_FRAGMENT_PROGRAM_ARB, 707 priv->highest_dirty_ps_const, state->ps_consts_f, priv->pshader_const_dirty); 708 shader_arb_ps_local_constants(gl_shader, context, state, rt_height); 709 710 if (context->constant_update_mask & WINED3D_SHADER_CONST_PS_NP2_FIXUP) 711 shader_arb_load_np2fixup_constants(&gl_shader->np2fixup_info, gl_info, state); 712 } 713 } 714 715 static void shader_arb_load_constants(void *shader_priv, struct wined3d_context *context, 716 const struct wined3d_state *state) 717 { 718 BOOL vs = use_vs(state); 719 BOOL ps = use_ps(state); 720 721 shader_arb_load_constants_internal(shader_priv, context, state, ps, vs, FALSE); 722 } 723 724 static void shader_arb_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count) 725 { 726 struct wined3d_context *context = context_get_current(); 727 struct shader_arb_priv *priv = device->shader_priv; 728 729 /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active 730 * context. On a context switch the old context will be fully dirtified */ 731 if (!context || context->device != device) 732 return; 733 734 memset(priv->vshader_const_dirty + start, 1, sizeof(*priv->vshader_const_dirty) * count); 735 priv->highest_dirty_vs_const = max(priv->highest_dirty_vs_const, start + count); 736 } 737 738 static void shader_arb_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count) 739 { 740 struct wined3d_context *context = context_get_current(); 741 struct shader_arb_priv *priv = device->shader_priv; 742 743 /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active 744 * context. On a context switch the old context will be fully dirtified */ 745 if (!context || context->device != device) 746 return; 747 748 memset(priv->pshader_const_dirty + start, 1, sizeof(*priv->pshader_const_dirty) * count); 749 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, start + count); 750 } 751 752 static void shader_arb_append_imm_vec4(struct wined3d_string_buffer *buffer, const float *values) 753 { 754 char str[4][17]; 755 756 wined3d_ftoa(values[0], str[0]); 757 wined3d_ftoa(values[1], str[1]); 758 wined3d_ftoa(values[2], str[2]); 759 wined3d_ftoa(values[3], str[3]); 760 shader_addline(buffer, "{%s, %s, %s, %s}", str[0], str[1], str[2], str[3]); 761 } 762 763 /* Generate the variable & register declarations for the ARB_vertex_program output target */ 764 static void shader_generate_arb_declarations(const struct wined3d_shader *shader, 765 const struct wined3d_shader_reg_maps *reg_maps, struct wined3d_string_buffer *buffer, 766 const struct wined3d_gl_info *gl_info, DWORD *num_clipplanes, 767 const struct shader_arb_ctx_priv *ctx) 768 { 769 DWORD i; 770 char pshader = shader_is_pshader_version(reg_maps->shader_version.type); 771 const struct wined3d_shader_lconst *lconst; 772 unsigned max_constantsF; 773 DWORD map; 774 775 /* In pixel shaders, all private constants are program local, we don't need anything 776 * from program.env. Thus we can advertise the full set of constants in pixel shaders. 777 * If we need a private constant the GL implementation will squeeze it in somewhere 778 * 779 * With vertex shaders we need the posFixup and on some GL implementations 4 helper 780 * immediate values. The posFixup is loaded using program.env for now, so always 781 * subtract one from the number of constants. If the shader uses indirect addressing, 782 * account for the helper const too because we have to declare all available d3d constants 783 * and don't know which are actually used. 784 */ 785 if (pshader) 786 { 787 max_constantsF = gl_info->limits.arb_ps_native_constants; 788 /* 24 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. */ 789 if (max_constantsF < 24) 790 max_constantsF = gl_info->limits.arb_ps_float_constants; 791 } 792 else 793 { 794 const struct arb_vshader_private *shader_data = shader->backend_data; 795 max_constantsF = gl_info->limits.arb_vs_native_constants; 796 /* 96 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. 797 * Also prevents max_constantsF from becoming less than 0 and 798 * wrapping . */ 799 if (max_constantsF < 96) 800 max_constantsF = gl_info->limits.arb_vs_float_constants; 801 802 if (reg_maps->usesrelconstF) 803 { 804 DWORD highest_constf = 0, clip_limit; 805 806 max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info); 807 max_constantsF -= wined3d_popcount(reg_maps->integer_constants); 808 max_constantsF -= gl_info->reserved_arb_constants; 809 810 for (i = 0; i < shader->limits->constant_float; ++i) 811 { 812 DWORD idx = i >> 5; 813 DWORD shift = i & 0x1f; 814 if (reg_maps->constf[idx] & (1u << shift)) 815 highest_constf = i; 816 } 817 818 if(use_nv_clip(gl_info) && ctx->target_version >= NV2) 819 { 820 if(ctx->cur_vs_args->super.clip_enabled) 821 clip_limit = gl_info->limits.user_clip_distances; 822 else 823 clip_limit = 0; 824 } 825 else 826 { 827 unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask; 828 clip_limit = min(wined3d_popcount(mask), 4); 829 } 830 *num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1); 831 max_constantsF -= *num_clipplanes; 832 if(*num_clipplanes < clip_limit) 833 { 834 WARN("Only %u clip planes out of %u enabled.\n", *num_clipplanes, 835 gl_info->limits.user_clip_distances); 836 } 837 } 838 else 839 { 840 if (ctx->target_version >= NV2) 841 *num_clipplanes = gl_info->limits.user_clip_distances; 842 else 843 *num_clipplanes = min(gl_info->limits.user_clip_distances, 4); 844 } 845 } 846 847 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) 848 { 849 if (map & 1) shader_addline(buffer, "TEMP R%u;\n", i); 850 } 851 852 for (i = 0, map = reg_maps->address; map; map >>= 1, ++i) 853 { 854 if (map & 1) shader_addline(buffer, "ADDRESS A%u;\n", i); 855 } 856 857 if (pshader && reg_maps->shader_version.major == 1 && reg_maps->shader_version.minor <= 3) 858 { 859 for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i) 860 { 861 if (map & 1) shader_addline(buffer, "TEMP T%u;\n", i); 862 } 863 } 864 865 if (!shader->load_local_constsF) 866 { 867 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 868 { 869 const float *value; 870 value = (const float *)lconst->value; 871 shader_addline(buffer, "PARAM C%u = ", lconst->idx); 872 shader_arb_append_imm_vec4(buffer, value); 873 shader_addline(buffer, ";\n"); 874 } 875 } 876 877 /* After subtracting privately used constants from the hardware limit(they are loaded as 878 * local constants), make sure the shader doesn't violate the env constant limit 879 */ 880 if (pshader) 881 { 882 max_constantsF = min(max_constantsF, gl_info->limits.arb_ps_float_constants); 883 } 884 else 885 { 886 max_constantsF = min(max_constantsF, gl_info->limits.arb_vs_float_constants); 887 } 888 889 /* Avoid declaring more constants than needed */ 890 max_constantsF = min(max_constantsF, shader->limits->constant_float); 891 892 /* we use the array-based constants array if the local constants are marked for loading, 893 * because then we use indirect addressing, or when the local constant list is empty, 894 * because then we don't know if we're using indirect addressing or not. If we're hardcoding 895 * local constants do not declare the loaded constants as an array because ARB compilers usually 896 * do not optimize unused constants away 897 */ 898 if (reg_maps->usesrelconstF) 899 { 900 /* Need to PARAM the environment parameters (constants) so we can use relative addressing */ 901 shader_addline(buffer, "PARAM C[%d] = { program.env[0..%d] };\n", 902 max_constantsF, max_constantsF - 1); 903 } 904 else 905 { 906 for (i = 0; i < max_constantsF; ++i) 907 { 908 if (!shader_constant_is_local(shader, i) && wined3d_extract_bits(reg_maps->constf, i, 1)) 909 { 910 shader_addline(buffer, "PARAM C%d = program.env[%d];\n",i, i); 911 } 912 } 913 } 914 } 915 916 static const char * const shift_tab[] = { 917 "dummy", /* 0 (none) */ 918 "coefmul.x", /* 1 (x2) */ 919 "coefmul.y", /* 2 (x4) */ 920 "coefmul.z", /* 3 (x8) */ 921 "coefmul.w", /* 4 (x16) */ 922 "dummy", /* 5 (x32) */ 923 "dummy", /* 6 (x64) */ 924 "dummy", /* 7 (x128) */ 925 "dummy", /* 8 (d256) */ 926 "dummy", /* 9 (d128) */ 927 "dummy", /* 10 (d64) */ 928 "dummy", /* 11 (d32) */ 929 "coefdiv.w", /* 12 (d16) */ 930 "coefdiv.z", /* 13 (d8) */ 931 "coefdiv.y", /* 14 (d4) */ 932 "coefdiv.x" /* 15 (d2) */ 933 }; 934 935 static void shader_arb_get_write_mask(const struct wined3d_shader_instruction *ins, 936 const struct wined3d_shader_dst_param *dst, char *write_mask) 937 { 938 char *ptr = write_mask; 939 940 if (dst->write_mask != WINED3DSP_WRITEMASK_ALL) 941 { 942 *ptr++ = '.'; 943 if (dst->write_mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x'; 944 if (dst->write_mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y'; 945 if (dst->write_mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z'; 946 if (dst->write_mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w'; 947 } 948 949 *ptr = '\0'; 950 } 951 952 static void shader_arb_get_swizzle(const struct wined3d_shader_src_param *param, BOOL fixup, char *swizzle_str) 953 { 954 /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra", 955 * but addressed as "rgba". To fix this we need to swap the register's x 956 * and z components. */ 957 const char *swizzle_chars = fixup ? "zyxw" : "xyzw"; 958 char *ptr = swizzle_str; 959 960 /* swizzle bits fields: wwzzyyxx */ 961 DWORD swizzle = param->swizzle; 962 DWORD swizzle_x = swizzle & 0x03; 963 DWORD swizzle_y = (swizzle >> 2) & 0x03; 964 DWORD swizzle_z = (swizzle >> 4) & 0x03; 965 DWORD swizzle_w = (swizzle >> 6) & 0x03; 966 967 /* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to 968 * generate a swizzle string. Unless we need to our own swizzling. */ 969 if (swizzle != WINED3DSP_NOSWIZZLE || fixup) 970 { 971 *ptr++ = '.'; 972 if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) { 973 *ptr++ = swizzle_chars[swizzle_x]; 974 } else { 975 *ptr++ = swizzle_chars[swizzle_x]; 976 *ptr++ = swizzle_chars[swizzle_y]; 977 *ptr++ = swizzle_chars[swizzle_z]; 978 *ptr++ = swizzle_chars[swizzle_w]; 979 } 980 } 981 982 *ptr = '\0'; 983 } 984 985 static void shader_arb_request_a0(const struct wined3d_shader_instruction *ins, const char *src) 986 { 987 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 988 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 989 990 if (!strcmp(priv->addr_reg, src)) return; 991 992 strcpy(priv->addr_reg, src); 993 shader_addline(buffer, "ARL A0.x, %s;\n", src); 994 } 995 996 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins, 997 const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr); 998 999 static void shader_arb_get_register_name(const struct wined3d_shader_instruction *ins, 1000 const struct wined3d_shader_register *reg, char *register_name, BOOL *is_color) 1001 { 1002 /* oPos, oFog and oPts in D3D */ 1003 static const char * const rastout_reg_names[] = {"TMP_OUT", "TMP_FOGCOORD", "result.pointsize"}; 1004 const struct wined3d_shader *shader = ins->ctx->shader; 1005 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 1006 BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type); 1007 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1008 1009 *is_color = FALSE; 1010 1011 switch (reg->type) 1012 { 1013 case WINED3DSPR_TEMP: 1014 sprintf(register_name, "R%u", reg->idx[0].offset); 1015 break; 1016 1017 case WINED3DSPR_INPUT: 1018 if (pshader) 1019 { 1020 if (reg_maps->shader_version.major < 3) 1021 { 1022 if (!reg->idx[0].offset) 1023 strcpy(register_name, "fragment.color.primary"); 1024 else 1025 strcpy(register_name, "fragment.color.secondary"); 1026 } 1027 else 1028 { 1029 if (reg->idx[0].rel_addr) 1030 { 1031 char rel_reg[50]; 1032 shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg); 1033 1034 if (!strcmp(rel_reg, "**aL_emul**")) 1035 { 1036 DWORD idx = ctx->aL + reg->idx[0].offset; 1037 if(idx < MAX_REG_INPUT) 1038 { 1039 strcpy(register_name, ctx->ps_input[idx]); 1040 } 1041 else 1042 { 1043 ERR("Pixel shader input register out of bounds: %u\n", idx); 1044 sprintf(register_name, "out_of_bounds_%u", idx); 1045 } 1046 } 1047 else if (reg_maps->input_registers & 0x0300) 1048 { 1049 /* There are two ways basically: 1050 * 1051 * 1) Use the unrolling code that is used for loop emulation and unroll the loop. 1052 * That means trouble if the loop also contains a breakc or if the control values 1053 * aren't local constants. 1054 * 2) Generate an if block that checks if aL.y < 8, == 8 or == 9 and selects the 1055 * source dynamically. The trouble is that we cannot simply read aL.y because it 1056 * is an ADDRESS register. We could however push it, load .zw with a value and use 1057 * ADAC to load the condition code register and pop it again afterwards 1058 */ 1059 FIXME("Relative input register addressing with more than 8 registers\n"); 1060 1061 /* This is better than nothing for now */ 1062 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset); 1063 } 1064 else if(ctx->cur_ps_args->super.vp_mode != WINED3D_VP_MODE_SHADER) 1065 { 1066 /* This is problematic because we'd have to consult the ctx->ps_input strings 1067 * for where to find the varying. Some may be "0.0", others can be texcoords or 1068 * colors. This needs either a pipeline replacement to make the vertex shader feed 1069 * proper varyings, or loop unrolling 1070 * 1071 * For now use the texcoords and hope for the best 1072 */ 1073 FIXME("Non-vertex shader varying input with indirect addressing\n"); 1074 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset); 1075 } 1076 else 1077 { 1078 /* D3D supports indirect addressing only with aL in loop registers. The loop instruction 1079 * pulls GL_NV_fragment_program2 in 1080 */ 1081 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset); 1082 } 1083 } 1084 else 1085 { 1086 if (reg->idx[0].offset < MAX_REG_INPUT) 1087 { 1088 strcpy(register_name, ctx->ps_input[reg->idx[0].offset]); 1089 } 1090 else 1091 { 1092 ERR("Pixel shader input register out of bounds: %u\n", reg->idx[0].offset); 1093 sprintf(register_name, "out_of_bounds_%u", reg->idx[0].offset); 1094 } 1095 } 1096 } 1097 } 1098 else 1099 { 1100 if (ctx->cur_vs_args->super.swizzle_map & (1u << reg->idx[0].offset)) 1101 *is_color = TRUE; 1102 sprintf(register_name, "vertex.attrib[%u]", reg->idx[0].offset); 1103 } 1104 break; 1105 1106 case WINED3DSPR_CONST: 1107 if (!pshader && reg->idx[0].rel_addr) 1108 { 1109 const struct arb_vshader_private *shader_data = shader->backend_data; 1110 UINT rel_offset = ctx->target_version == ARB ? shader_data->rel_offset : 0; 1111 BOOL aL = FALSE; 1112 char rel_reg[50]; 1113 if (reg_maps->shader_version.major < 2) 1114 { 1115 sprintf(rel_reg, "A0.x"); 1116 } 1117 else 1118 { 1119 shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg); 1120 if (ctx->target_version == ARB) 1121 { 1122 if (!strcmp(rel_reg, "**aL_emul**")) 1123 { 1124 aL = TRUE; 1125 } else { 1126 shader_arb_request_a0(ins, rel_reg); 1127 sprintf(rel_reg, "A0.x"); 1128 } 1129 } 1130 } 1131 if (aL) 1132 sprintf(register_name, "C[%u]", ctx->aL + reg->idx[0].offset); 1133 else if (reg->idx[0].offset >= rel_offset) 1134 sprintf(register_name, "C[%s + %u]", rel_reg, reg->idx[0].offset - rel_offset); 1135 else 1136 sprintf(register_name, "C[%s - %u]", rel_reg, rel_offset - reg->idx[0].offset); 1137 } 1138 else 1139 { 1140 if (reg_maps->usesrelconstF) 1141 sprintf(register_name, "C[%u]", reg->idx[0].offset); 1142 else 1143 sprintf(register_name, "C%u", reg->idx[0].offset); 1144 } 1145 break; 1146 1147 case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */ 1148 if (pshader) 1149 { 1150 if (reg_maps->shader_version.major == 1 1151 && reg_maps->shader_version.minor <= 3) 1152 /* In ps <= 1.3, Tx is a temporary register as destination 1153 * to all instructions, and as source to most instructions. 1154 * For some instructions it is the texcoord input. Those 1155 * instructions know about the special use. */ 1156 sprintf(register_name, "T%u", reg->idx[0].offset); 1157 else 1158 /* In ps 1.4 and 2.x Tx is always a (read-only) varying. */ 1159 sprintf(register_name, "fragment.texcoord[%u]", reg->idx[0].offset); 1160 } 1161 else 1162 { 1163 if (reg_maps->shader_version.major == 1 || ctx->target_version >= NV2) 1164 sprintf(register_name, "A%u", reg->idx[0].offset); 1165 else 1166 sprintf(register_name, "A%u_SHADOW", reg->idx[0].offset); 1167 } 1168 break; 1169 1170 case WINED3DSPR_COLOROUT: 1171 if (ctx->ps_post_process && !reg->idx[0].offset) 1172 { 1173 strcpy(register_name, "TMP_COLOR"); 1174 } 1175 else 1176 { 1177 if (ctx->cur_ps_args->super.srgb_correction) 1178 FIXME("sRGB correction on higher render targets.\n"); 1179 if (reg_maps->rt_mask > 1) 1180 sprintf(register_name, "result.color[%u]", reg->idx[0].offset); 1181 else 1182 strcpy(register_name, "result.color"); 1183 } 1184 break; 1185 1186 case WINED3DSPR_RASTOUT: 1187 if (reg->idx[0].offset == 1) 1188 sprintf(register_name, "%s", ctx->fog_output); 1189 else 1190 sprintf(register_name, "%s", rastout_reg_names[reg->idx[0].offset]); 1191 break; 1192 1193 case WINED3DSPR_DEPTHOUT: 1194 strcpy(register_name, "result.depth"); 1195 break; 1196 1197 case WINED3DSPR_ATTROUT: 1198 /* case WINED3DSPR_OUTPUT: */ 1199 if (pshader) 1200 sprintf(register_name, "oD[%u]", reg->idx[0].offset); 1201 else 1202 strcpy(register_name, ctx->color_output[reg->idx[0].offset]); 1203 break; 1204 1205 case WINED3DSPR_TEXCRDOUT: 1206 if (pshader) 1207 sprintf(register_name, "oT[%u]", reg->idx[0].offset); 1208 else if (reg_maps->shader_version.major < 3) 1209 strcpy(register_name, ctx->texcrd_output[reg->idx[0].offset]); 1210 else 1211 strcpy(register_name, ctx->vs_output[reg->idx[0].offset]); 1212 break; 1213 1214 case WINED3DSPR_LOOP: 1215 if(ctx->target_version >= NV2) 1216 { 1217 /* Pshader has an implicitly declared loop index counter A0.x that cannot be renamed */ 1218 if(pshader) sprintf(register_name, "A0.x"); 1219 else sprintf(register_name, "aL.y"); 1220 } 1221 else 1222 { 1223 /* Unfortunately this code cannot return the value of ctx->aL here. An immediate value 1224 * would be valid, but if aL is used for indexing(its only use), there's likely an offset, 1225 * thus the result would be something like C[15 + 30], which is not valid in the ARB program 1226 * grammar. So return a marker for the emulated aL and intercept it in constant and varying 1227 * indexing 1228 */ 1229 sprintf(register_name, "**aL_emul**"); 1230 } 1231 1232 break; 1233 1234 case WINED3DSPR_CONSTINT: 1235 sprintf(register_name, "I%u", reg->idx[0].offset); 1236 break; 1237 1238 case WINED3DSPR_MISCTYPE: 1239 if (!reg->idx[0].offset) 1240 sprintf(register_name, "vpos"); 1241 else if (reg->idx[0].offset == 1) 1242 sprintf(register_name, "fragment.facing.x"); 1243 else 1244 FIXME("Unknown MISCTYPE register index %u.\n", reg->idx[0].offset); 1245 break; 1246 1247 default: 1248 FIXME("Unhandled register type %#x[%u].\n", reg->type, reg->idx[0].offset); 1249 sprintf(register_name, "unrecognized_register[%u]", reg->idx[0].offset); 1250 break; 1251 } 1252 } 1253 1254 static void shader_arb_get_dst_param(const struct wined3d_shader_instruction *ins, 1255 const struct wined3d_shader_dst_param *wined3d_dst, char *str) 1256 { 1257 char register_name[255]; 1258 char write_mask[6]; 1259 BOOL is_color; 1260 1261 shader_arb_get_register_name(ins, &wined3d_dst->reg, register_name, &is_color); 1262 strcpy(str, register_name); 1263 1264 shader_arb_get_write_mask(ins, wined3d_dst, write_mask); 1265 strcat(str, write_mask); 1266 } 1267 1268 static const char *shader_arb_get_fixup_swizzle(enum fixup_channel_source channel_source) 1269 { 1270 switch(channel_source) 1271 { 1272 case CHANNEL_SOURCE_ZERO: return "0"; 1273 case CHANNEL_SOURCE_ONE: return "1"; 1274 case CHANNEL_SOURCE_X: return "x"; 1275 case CHANNEL_SOURCE_Y: return "y"; 1276 case CHANNEL_SOURCE_Z: return "z"; 1277 case CHANNEL_SOURCE_W: return "w"; 1278 default: 1279 FIXME("Unhandled channel source %#x\n", channel_source); 1280 return "undefined"; 1281 } 1282 } 1283 1284 struct color_fixup_masks 1285 { 1286 DWORD source; 1287 DWORD sign; 1288 }; 1289 1290 static struct color_fixup_masks calc_color_correction(struct color_fixup_desc fixup, DWORD dst_mask) 1291 { 1292 struct color_fixup_masks masks = {0, 0}; 1293 1294 if (is_complex_fixup(fixup)) 1295 { 1296 enum complex_fixup complex_fixup = get_complex_fixup(fixup); 1297 FIXME("Complex fixup (%#x) not supported\n", complex_fixup); 1298 return masks; 1299 } 1300 1301 if (fixup.x_source != CHANNEL_SOURCE_X) 1302 masks.source |= WINED3DSP_WRITEMASK_0; 1303 if (fixup.y_source != CHANNEL_SOURCE_Y) 1304 masks.source |= WINED3DSP_WRITEMASK_1; 1305 if (fixup.z_source != CHANNEL_SOURCE_Z) 1306 masks.source |= WINED3DSP_WRITEMASK_2; 1307 if (fixup.w_source != CHANNEL_SOURCE_W) 1308 masks.source |= WINED3DSP_WRITEMASK_3; 1309 masks.source &= dst_mask; 1310 1311 if (fixup.x_sign_fixup) 1312 masks.sign |= WINED3DSP_WRITEMASK_0; 1313 if (fixup.y_sign_fixup) 1314 masks.sign |= WINED3DSP_WRITEMASK_1; 1315 if (fixup.z_sign_fixup) 1316 masks.sign |= WINED3DSP_WRITEMASK_2; 1317 if (fixup.w_sign_fixup) 1318 masks.sign |= WINED3DSP_WRITEMASK_3; 1319 masks.sign &= dst_mask; 1320 1321 return masks; 1322 } 1323 1324 static void gen_color_correction(struct wined3d_string_buffer *buffer, const char *dst, 1325 const char *src, const char *one, const char *two, 1326 struct color_fixup_desc fixup, struct color_fixup_masks masks) 1327 { 1328 const char *sign_fixup_src = dst; 1329 1330 if (masks.source) 1331 { 1332 if (masks.sign) 1333 sign_fixup_src = "TA"; 1334 1335 shader_addline(buffer, "SWZ %s, %s, %s, %s, %s, %s;\n", sign_fixup_src, src, 1336 shader_arb_get_fixup_swizzle(fixup.x_source), shader_arb_get_fixup_swizzle(fixup.y_source), 1337 shader_arb_get_fixup_swizzle(fixup.z_source), shader_arb_get_fixup_swizzle(fixup.w_source)); 1338 } 1339 else if (masks.sign) 1340 { 1341 sign_fixup_src = src; 1342 } 1343 1344 if (masks.sign) 1345 { 1346 char reg_mask[6]; 1347 char *ptr = reg_mask; 1348 1349 if (masks.sign != WINED3DSP_WRITEMASK_ALL) 1350 { 1351 *ptr++ = '.'; 1352 if (masks.sign & WINED3DSP_WRITEMASK_0) 1353 *ptr++ = 'x'; 1354 if (masks.sign & WINED3DSP_WRITEMASK_1) 1355 *ptr++ = 'y'; 1356 if (masks.sign & WINED3DSP_WRITEMASK_2) 1357 *ptr++ = 'z'; 1358 if (masks.sign & WINED3DSP_WRITEMASK_3) 1359 *ptr++ = 'w'; 1360 } 1361 *ptr = '\0'; 1362 1363 shader_addline(buffer, "MAD %s%s, %s, %s, -%s;\n", dst, reg_mask, sign_fixup_src, two, one); 1364 } 1365 } 1366 1367 static const char *shader_arb_get_modifier(const struct wined3d_shader_instruction *ins) 1368 { 1369 DWORD mod; 1370 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1371 if (!ins->dst_count) return ""; 1372 1373 mod = ins->dst[0].modifiers; 1374 1375 /* Silently ignore PARTIALPRECISION if it's not supported */ 1376 if(priv->target_version == ARB) mod &= ~WINED3DSPDM_PARTIALPRECISION; 1377 1378 if(mod & WINED3DSPDM_MSAMPCENTROID) 1379 { 1380 FIXME("Unhandled modifier WINED3DSPDM_MSAMPCENTROID\n"); 1381 mod &= ~WINED3DSPDM_MSAMPCENTROID; 1382 } 1383 1384 switch(mod) 1385 { 1386 case WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION: 1387 return "H_SAT"; 1388 1389 case WINED3DSPDM_SATURATE: 1390 return "_SAT"; 1391 1392 case WINED3DSPDM_PARTIALPRECISION: 1393 return "H"; 1394 1395 case 0: 1396 return ""; 1397 1398 default: 1399 FIXME("Unknown modifiers 0x%08x\n", mod); 1400 return ""; 1401 } 1402 } 1403 1404 #define TEX_PROJ 0x1 1405 #define TEX_BIAS 0x2 1406 #define TEX_LOD 0x4 1407 #define TEX_DERIV 0x10 1408 1409 static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD sampler_idx, 1410 const char *dst_str, const char *coord_reg, WORD flags, const char *dsx, const char *dsy) 1411 { 1412 enum wined3d_shader_resource_type resource_type = ins->ctx->reg_maps->resource_info[sampler_idx].type; 1413 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1414 const char *tex_type; 1415 BOOL np2_fixup = FALSE; 1416 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1417 const char *mod; 1418 BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type); 1419 const char *tex_dst = dst_str; 1420 struct color_fixup_masks masks; 1421 1422 /* D3D vertex shader sampler IDs are vertex samplers(0-3), not global d3d samplers */ 1423 if(!pshader) sampler_idx += MAX_FRAGMENT_SAMPLERS; 1424 1425 switch (resource_type) 1426 { 1427 case WINED3D_SHADER_RESOURCE_TEXTURE_1D: 1428 tex_type = "1D"; 1429 break; 1430 1431 case WINED3D_SHADER_RESOURCE_TEXTURE_2D: 1432 if (pshader && priv->cur_ps_args->super.np2_fixup & (1u << sampler_idx) 1433 && ins->ctx->gl_info->supported[ARB_TEXTURE_RECTANGLE]) 1434 tex_type = "RECT"; 1435 else 1436 tex_type = "2D"; 1437 if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) 1438 { 1439 if (priv->cur_np2fixup_info->super.active & (1u << sampler_idx)) 1440 { 1441 if (flags) FIXME("Only ordinary sampling from NP2 textures is supported.\n"); 1442 else np2_fixup = TRUE; 1443 } 1444 } 1445 break; 1446 1447 case WINED3D_SHADER_RESOURCE_TEXTURE_3D: 1448 tex_type = "3D"; 1449 break; 1450 1451 case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE: 1452 tex_type = "CUBE"; 1453 break; 1454 1455 default: 1456 ERR("Unexpected resource type %#x.\n", resource_type); 1457 tex_type = ""; 1458 } 1459 1460 /* TEX, TXL, TXD and TXP do not support the "H" modifier, 1461 * so don't use shader_arb_get_modifier 1462 */ 1463 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) mod = "_SAT"; 1464 else mod = ""; 1465 1466 /* Fragment samplers always have indentity mapping */ 1467 if(sampler_idx >= MAX_FRAGMENT_SAMPLERS) 1468 { 1469 sampler_idx = priv->cur_vs_args->vertex.samplers[sampler_idx - MAX_FRAGMENT_SAMPLERS]; 1470 } 1471 1472 if (pshader) 1473 { 1474 masks = calc_color_correction(priv->cur_ps_args->super.color_fixup[sampler_idx], 1475 ins->dst[0].write_mask); 1476 1477 if (masks.source || masks.sign) 1478 tex_dst = "TA"; 1479 } 1480 1481 if (flags & TEX_DERIV) 1482 { 1483 if(flags & TEX_PROJ) FIXME("Projected texture sampling with custom derivatives\n"); 1484 if(flags & TEX_BIAS) FIXME("Biased texture sampling with custom derivatives\n"); 1485 shader_addline(buffer, "TXD%s %s, %s, %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, 1486 dsx, dsy, sampler_idx, tex_type); 1487 } 1488 else if(flags & TEX_LOD) 1489 { 1490 if(flags & TEX_PROJ) FIXME("Projected texture sampling with explicit lod\n"); 1491 if(flags & TEX_BIAS) FIXME("Biased texture sampling with explicit lod\n"); 1492 shader_addline(buffer, "TXL%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, 1493 sampler_idx, tex_type); 1494 } 1495 else if (flags & TEX_BIAS) 1496 { 1497 /* Shouldn't be possible, but let's check for it */ 1498 if(flags & TEX_PROJ) FIXME("Biased and Projected texture sampling\n"); 1499 /* TXB takes the 4th component of the source vector automatically, as d3d. Nothing more to do */ 1500 shader_addline(buffer, "TXB%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type); 1501 } 1502 else if (flags & TEX_PROJ) 1503 { 1504 shader_addline(buffer, "TXP%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type); 1505 } 1506 else 1507 { 1508 if (np2_fixup) 1509 { 1510 const unsigned char idx = priv->cur_np2fixup_info->super.idx[sampler_idx]; 1511 shader_addline(buffer, "MUL TA, np2fixup[%u].%s, %s;\n", idx >> 1, 1512 (idx % 2) ? "zwxy" : "xyzw", coord_reg); 1513 1514 shader_addline(buffer, "TEX%s %s, TA, texture[%u], %s;\n", mod, tex_dst, sampler_idx, tex_type); 1515 } 1516 else 1517 shader_addline(buffer, "TEX%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type); 1518 } 1519 1520 if (pshader) 1521 { 1522 gen_color_correction(buffer, dst_str, tex_dst, 1523 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_ONE), 1524 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_TWO), 1525 priv->cur_ps_args->super.color_fixup[sampler_idx], masks); 1526 } 1527 } 1528 1529 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins, 1530 const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr) 1531 { 1532 /* Generate a line that does the input modifier computation and return the input register to use */ 1533 BOOL is_color = FALSE, insert_line; 1534 char regstr[256]; 1535 char swzstr[20]; 1536 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1537 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1538 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 1539 const char *two = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_TWO); 1540 1541 /* Assume a new line will be added */ 1542 insert_line = TRUE; 1543 1544 /* Get register name */ 1545 shader_arb_get_register_name(ins, &src->reg, regstr, &is_color); 1546 shader_arb_get_swizzle(src, is_color, swzstr); 1547 1548 switch (src->modifiers) 1549 { 1550 case WINED3DSPSM_NONE: 1551 sprintf(outregstr, "%s%s", regstr, swzstr); 1552 insert_line = FALSE; 1553 break; 1554 case WINED3DSPSM_NEG: 1555 sprintf(outregstr, "-%s%s", regstr, swzstr); 1556 insert_line = FALSE; 1557 break; 1558 case WINED3DSPSM_BIAS: 1559 shader_addline(buffer, "ADD T%c, %s, -coefdiv.x;\n", 'A' + tmpreg, regstr); 1560 break; 1561 case WINED3DSPSM_BIASNEG: 1562 shader_addline(buffer, "ADD T%c, -%s, coefdiv.x;\n", 'A' + tmpreg, regstr); 1563 break; 1564 case WINED3DSPSM_SIGN: 1565 shader_addline(buffer, "MAD T%c, %s, %s, -%s;\n", 'A' + tmpreg, regstr, two, one); 1566 break; 1567 case WINED3DSPSM_SIGNNEG: 1568 shader_addline(buffer, "MAD T%c, %s, -%s, %s;\n", 'A' + tmpreg, regstr, two, one); 1569 break; 1570 case WINED3DSPSM_COMP: 1571 shader_addline(buffer, "SUB T%c, %s, %s;\n", 'A' + tmpreg, one, regstr); 1572 break; 1573 case WINED3DSPSM_X2: 1574 shader_addline(buffer, "ADD T%c, %s, %s;\n", 'A' + tmpreg, regstr, regstr); 1575 break; 1576 case WINED3DSPSM_X2NEG: 1577 shader_addline(buffer, "ADD T%c, -%s, -%s;\n", 'A' + tmpreg, regstr, regstr); 1578 break; 1579 case WINED3DSPSM_DZ: 1580 shader_addline(buffer, "RCP T%c, %s.z;\n", 'A' + tmpreg, regstr); 1581 shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg); 1582 break; 1583 case WINED3DSPSM_DW: 1584 shader_addline(buffer, "RCP T%c, %s.w;\n", 'A' + tmpreg, regstr); 1585 shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg); 1586 break; 1587 case WINED3DSPSM_ABS: 1588 if(ctx->target_version >= NV2) { 1589 sprintf(outregstr, "|%s%s|", regstr, swzstr); 1590 insert_line = FALSE; 1591 } else { 1592 shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr); 1593 } 1594 break; 1595 case WINED3DSPSM_ABSNEG: 1596 if(ctx->target_version >= NV2) { 1597 sprintf(outregstr, "-|%s%s|", regstr, swzstr); 1598 } else { 1599 shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr); 1600 sprintf(outregstr, "-T%c%s", 'A' + tmpreg, swzstr); 1601 } 1602 insert_line = FALSE; 1603 break; 1604 default: 1605 sprintf(outregstr, "%s%s", regstr, swzstr); 1606 insert_line = FALSE; 1607 } 1608 1609 /* Return modified or original register, with swizzle */ 1610 if (insert_line) 1611 sprintf(outregstr, "T%c%s", 'A' + tmpreg, swzstr); 1612 } 1613 1614 static void pshader_hw_bem(const struct wined3d_shader_instruction *ins) 1615 { 1616 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1617 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1618 DWORD sampler_code = dst->reg.idx[0].offset; 1619 char dst_name[50]; 1620 char src_name[2][50]; 1621 1622 shader_arb_get_dst_param(ins, dst, dst_name); 1623 1624 /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed 1625 * 1626 * Keep in mind that src_name[1] can be "TB" and src_name[0] can be "TA" because modifiers like _x2 are valid 1627 * with bem. So delay loading the first parameter until after the perturbation calculation which needs two 1628 * temps is done. 1629 */ 1630 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1631 shader_addline(buffer, "SWZ TA, bumpenvmat%d, x, z, 0, 0;\n", sampler_code); 1632 shader_addline(buffer, "DP3 TC.r, TA, %s;\n", src_name[1]); 1633 shader_addline(buffer, "SWZ TA, bumpenvmat%d, y, w, 0, 0;\n", sampler_code); 1634 shader_addline(buffer, "DP3 TC.g, TA, %s;\n", src_name[1]); 1635 1636 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1637 shader_addline(buffer, "ADD %s, %s, TC;\n", dst_name, src_name[0]); 1638 } 1639 1640 static DWORD negate_modifiers(DWORD mod, char *extra_char) 1641 { 1642 *extra_char = ' '; 1643 switch(mod) 1644 { 1645 case WINED3DSPSM_NONE: return WINED3DSPSM_NEG; 1646 case WINED3DSPSM_NEG: return WINED3DSPSM_NONE; 1647 case WINED3DSPSM_BIAS: return WINED3DSPSM_BIASNEG; 1648 case WINED3DSPSM_BIASNEG: return WINED3DSPSM_BIAS; 1649 case WINED3DSPSM_SIGN: return WINED3DSPSM_SIGNNEG; 1650 case WINED3DSPSM_SIGNNEG: return WINED3DSPSM_SIGN; 1651 case WINED3DSPSM_COMP: *extra_char = '-'; return WINED3DSPSM_COMP; 1652 case WINED3DSPSM_X2: return WINED3DSPSM_X2NEG; 1653 case WINED3DSPSM_X2NEG: return WINED3DSPSM_X2; 1654 case WINED3DSPSM_DZ: *extra_char = '-'; return WINED3DSPSM_DZ; 1655 case WINED3DSPSM_DW: *extra_char = '-'; return WINED3DSPSM_DW; 1656 case WINED3DSPSM_ABS: return WINED3DSPSM_ABSNEG; 1657 case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS; 1658 } 1659 FIXME("Unknown modifier %u\n", mod); 1660 return mod; 1661 } 1662 1663 static void pshader_hw_cnd(const struct wined3d_shader_instruction *ins) 1664 { 1665 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1666 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1667 char dst_name[50]; 1668 char src_name[3][50]; 1669 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 1670 ins->ctx->reg_maps->shader_version.minor); 1671 1672 shader_arb_get_dst_param(ins, dst, dst_name); 1673 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1674 1675 if (shader_version <= WINED3D_SHADER_VERSION(1, 3) && ins->coissue 1676 && ins->dst->write_mask != WINED3DSP_WRITEMASK_3) 1677 { 1678 shader_addline(buffer, "MOV%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[1]); 1679 } 1680 else 1681 { 1682 struct wined3d_shader_src_param src0_copy = ins->src[0]; 1683 char extra_neg; 1684 1685 /* src0 may have a negate srcmod set, so we can't blindly add "-" to the name */ 1686 src0_copy.modifiers = negate_modifiers(src0_copy.modifiers, &extra_neg); 1687 1688 shader_arb_get_src_param(ins, &src0_copy, 0, src_name[0]); 1689 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1690 shader_addline(buffer, "ADD TA, %c%s, coefdiv.x;\n", extra_neg, src_name[0]); 1691 shader_addline(buffer, "CMP%s %s, TA, %s, %s;\n", shader_arb_get_modifier(ins), 1692 dst_name, src_name[1], src_name[2]); 1693 } 1694 } 1695 1696 static void pshader_hw_cmp(const struct wined3d_shader_instruction *ins) 1697 { 1698 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1699 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1700 char dst_name[50]; 1701 char src_name[3][50]; 1702 1703 shader_arb_get_dst_param(ins, dst, dst_name); 1704 1705 /* Generate input register names (with modifiers) */ 1706 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1707 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1708 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1709 1710 shader_addline(buffer, "CMP%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 1711 dst_name, src_name[0], src_name[2], src_name[1]); 1712 } 1713 1714 /** Process the WINED3DSIO_DP2ADD instruction in ARB. 1715 * dst = dot2(src0, src1) + src2 */ 1716 static void pshader_hw_dp2add(const struct wined3d_shader_instruction *ins) 1717 { 1718 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1719 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1720 char dst_name[50]; 1721 char src_name[3][50]; 1722 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1723 1724 shader_arb_get_dst_param(ins, dst, dst_name); 1725 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1726 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1727 1728 if(ctx->target_version >= NV3) 1729 { 1730 /* GL_NV_fragment_program2 has a 1:1 matching instruction */ 1731 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1732 shader_addline(buffer, "DP2A%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 1733 dst_name, src_name[0], src_name[1], src_name[2]); 1734 } 1735 else if(ctx->target_version >= NV2) 1736 { 1737 /* dst.x = src2.?, src0.x, src1.x + src0.y * src1.y 1738 * dst.y = src2.?, src0.x, src1.z + src0.y * src1.w 1739 * dst.z = src2.?, src0.x, src1.x + src0.y * src1.y 1740 * dst.z = src2.?, src0.x, src1.z + src0.y * src1.w 1741 * 1742 * Make sure that src1.zw = src1.xy, then we get a classic dp2add 1743 * 1744 * .xyxy and other swizzles that we could get with this are not valid in 1745 * plain ARBfp, but luckily the NV extension grammar lifts this limitation. 1746 */ 1747 struct wined3d_shader_src_param tmp_param = ins->src[1]; 1748 DWORD swizzle = tmp_param.swizzle & 0xf; /* Selects .xy */ 1749 tmp_param.swizzle = swizzle | (swizzle << 4); /* Creates .xyxy */ 1750 1751 shader_arb_get_src_param(ins, &tmp_param, 1, src_name[1]); 1752 1753 shader_addline(buffer, "X2D%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 1754 dst_name, src_name[2], src_name[0], src_name[1]); 1755 } 1756 else 1757 { 1758 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1759 /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2] 1760 * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite 1761 */ 1762 shader_addline(buffer, "MOV TA, %s;\n", src_name[0]); 1763 shader_addline(buffer, "MOV TA.z, 0.0;\n"); 1764 shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]); 1765 shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]); 1766 } 1767 } 1768 1769 /* Map the opcode 1-to-1 to the GL code */ 1770 static void shader_hw_map2gl(const struct wined3d_shader_instruction *ins) 1771 { 1772 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1773 const char *instruction; 1774 char arguments[256], dst_str[50]; 1775 unsigned int i; 1776 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1777 1778 switch (ins->handler_idx) 1779 { 1780 case WINED3DSIH_ABS: instruction = "ABS"; break; 1781 case WINED3DSIH_ADD: instruction = "ADD"; break; 1782 case WINED3DSIH_CRS: instruction = "XPD"; break; 1783 case WINED3DSIH_DP3: instruction = "DP3"; break; 1784 case WINED3DSIH_DP4: instruction = "DP4"; break; 1785 case WINED3DSIH_DST: instruction = "DST"; break; 1786 case WINED3DSIH_FRC: instruction = "FRC"; break; 1787 case WINED3DSIH_LIT: instruction = "LIT"; break; 1788 case WINED3DSIH_LRP: instruction = "LRP"; break; 1789 case WINED3DSIH_MAD: instruction = "MAD"; break; 1790 case WINED3DSIH_MAX: instruction = "MAX"; break; 1791 case WINED3DSIH_MIN: instruction = "MIN"; break; 1792 case WINED3DSIH_MOV: instruction = "MOV"; break; 1793 case WINED3DSIH_MUL: instruction = "MUL"; break; 1794 case WINED3DSIH_SGE: instruction = "SGE"; break; 1795 case WINED3DSIH_SLT: instruction = "SLT"; break; 1796 case WINED3DSIH_SUB: instruction = "SUB"; break; 1797 case WINED3DSIH_MOVA:instruction = "ARR"; break; 1798 case WINED3DSIH_DSX: instruction = "DDX"; break; 1799 default: instruction = ""; 1800 FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 1801 break; 1802 } 1803 1804 /* Note that shader_arb_add_dst_param() adds spaces. */ 1805 arguments[0] = '\0'; 1806 shader_arb_get_dst_param(ins, dst, dst_str); 1807 for (i = 0; i < ins->src_count; ++i) 1808 { 1809 char operand[100]; 1810 strcat(arguments, ", "); 1811 shader_arb_get_src_param(ins, &ins->src[i], i, operand); 1812 strcat(arguments, operand); 1813 } 1814 shader_addline(buffer, "%s%s %s%s;\n", instruction, shader_arb_get_modifier(ins), dst_str, arguments); 1815 } 1816 1817 static void shader_hw_nop(const struct wined3d_shader_instruction *ins) {} 1818 1819 static DWORD shader_arb_select_component(DWORD swizzle, DWORD component) 1820 { 1821 return ((swizzle >> 2 * component) & 0x3) * 0x55; 1822 } 1823 1824 static void shader_hw_mov(const struct wined3d_shader_instruction *ins) 1825 { 1826 const struct wined3d_shader *shader = ins->ctx->shader; 1827 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 1828 BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type); 1829 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1830 const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO); 1831 const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE); 1832 const char *two = arb_get_helper_value(reg_maps->shader_version.type, ARB_TWO); 1833 1834 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1835 char src0_param[256]; 1836 1837 if (ins->handler_idx == WINED3DSIH_MOVA) 1838 { 1839 const struct arb_vshader_private *shader_data = shader->backend_data; 1840 char write_mask[6]; 1841 const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET); 1842 1843 if(ctx->target_version >= NV2) { 1844 shader_hw_map2gl(ins); 1845 return; 1846 } 1847 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param); 1848 shader_arb_get_write_mask(ins, &ins->dst[0], write_mask); 1849 1850 /* This implements the mova formula used in GLSL. The first two instructions 1851 * prepare the sign() part. Note that it is fine to have my_sign(0.0) = 1.0 1852 * in this case: 1853 * mova A0.x, 0.0 1854 * 1855 * A0.x = arl(floor(abs(0.0) + 0.5) * 1.0) = floor(0.5) = 0.0 since arl does a floor 1856 * 1857 * The ARL is performed when A0 is used - the requested component is read from A0_SHADOW into 1858 * A0.x. We can use the overwritten component of A0_shadow as temporary storage for the sign. 1859 */ 1860 shader_addline(buffer, "SGE A0_SHADOW%s, %s, %s;\n", write_mask, src0_param, zero); 1861 shader_addline(buffer, "MAD A0_SHADOW%s, A0_SHADOW, %s, -%s;\n", write_mask, two, one); 1862 1863 shader_addline(buffer, "ABS TA%s, %s;\n", write_mask, src0_param); 1864 shader_addline(buffer, "ADD TA%s, TA, rel_addr_const.x;\n", write_mask); 1865 shader_addline(buffer, "FLR TA%s, TA;\n", write_mask); 1866 if (shader_data->rel_offset) 1867 { 1868 shader_addline(buffer, "ADD TA%s, TA, %s;\n", write_mask, offset); 1869 } 1870 shader_addline(buffer, "MUL A0_SHADOW%s, TA, A0_SHADOW;\n", write_mask); 1871 1872 ((struct shader_arb_ctx_priv *)ins->ctx->backend_data)->addr_reg[0] = '\0'; 1873 } 1874 else if (reg_maps->shader_version.major == 1 1875 && !shader_is_pshader_version(reg_maps->shader_version.type) 1876 && ins->dst[0].reg.type == WINED3DSPR_ADDR) 1877 { 1878 const struct arb_vshader_private *shader_data = shader->backend_data; 1879 src0_param[0] = '\0'; 1880 1881 if (shader_data->rel_offset && ctx->target_version == ARB) 1882 { 1883 const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET); 1884 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param); 1885 shader_addline(buffer, "ADD TA.x, %s, %s;\n", src0_param, offset); 1886 shader_addline(buffer, "ARL A0.x, TA.x;\n"); 1887 } 1888 else 1889 { 1890 /* Apple's ARB_vertex_program implementation does not accept an ARL source argument 1891 * with more than one component. Thus replicate the first source argument over all 1892 * 4 components. For example, .xyzw -> .x (or better: .xxxx), .zwxy -> .z, etc) */ 1893 struct wined3d_shader_src_param tmp_src = ins->src[0]; 1894 tmp_src.swizzle = shader_arb_select_component(tmp_src.swizzle, 0); 1895 shader_arb_get_src_param(ins, &tmp_src, 0, src0_param); 1896 shader_addline(buffer, "ARL A0.x, %s;\n", src0_param); 1897 } 1898 } 1899 else if (ins->dst[0].reg.type == WINED3DSPR_COLOROUT && !ins->dst[0].reg.idx[0].offset && pshader) 1900 { 1901 if (ctx->ps_post_process && shader->u.ps.color0_mov) 1902 { 1903 shader_addline(buffer, "#mov handled in srgb write or fog code\n"); 1904 return; 1905 } 1906 shader_hw_map2gl(ins); 1907 } 1908 else 1909 { 1910 shader_hw_map2gl(ins); 1911 } 1912 } 1913 1914 static void pshader_hw_texkill(const struct wined3d_shader_instruction *ins) 1915 { 1916 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1917 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1918 char reg_dest[40]; 1919 1920 /* No swizzles are allowed in d3d's texkill. PS 1.x ignores the 4th component as documented, 1921 * but >= 2.0 honors it (undocumented, but tested by the d3d9 testsuite) 1922 */ 1923 shader_arb_get_dst_param(ins, dst, reg_dest); 1924 1925 if (ins->ctx->reg_maps->shader_version.major >= 2) 1926 { 1927 const char *kilsrc = "TA"; 1928 BOOL is_color; 1929 1930 shader_arb_get_register_name(ins, &dst->reg, reg_dest, &is_color); 1931 if(dst->write_mask == WINED3DSP_WRITEMASK_ALL) 1932 { 1933 kilsrc = reg_dest; 1934 } 1935 else 1936 { 1937 /* Sigh. KIL doesn't support swizzles/writemasks. KIL passes a writemask, but ".xy" for example 1938 * is not valid as a swizzle in ARB (needs ".xyyy"). Use SWZ to load the register properly, and set 1939 * masked out components to 0(won't kill) 1940 */ 1941 char x = '0', y = '0', z = '0', w = '0'; 1942 if(dst->write_mask & WINED3DSP_WRITEMASK_0) x = 'x'; 1943 if(dst->write_mask & WINED3DSP_WRITEMASK_1) y = 'y'; 1944 if(dst->write_mask & WINED3DSP_WRITEMASK_2) z = 'z'; 1945 if(dst->write_mask & WINED3DSP_WRITEMASK_3) w = 'w'; 1946 shader_addline(buffer, "SWZ TA, %s, %c, %c, %c, %c;\n", reg_dest, x, y, z, w); 1947 } 1948 shader_addline(buffer, "KIL %s;\n", kilsrc); 1949 } 1950 else 1951 { 1952 /* ARB fp doesn't like swizzles on the parameter of the KIL instruction. To mask the 4th component, 1953 * copy the register into our general purpose TMP variable, overwrite .w and pass TMP to KIL 1954 * 1955 * ps_1_3 shaders use the texcoord incarnation of the Tx register. ps_1_4 shaders can use the same, 1956 * or pass in any temporary register(in shader phase 2) 1957 */ 1958 if (ins->ctx->reg_maps->shader_version.minor <= 3) 1959 sprintf(reg_dest, "fragment.texcoord[%u]", dst->reg.idx[0].offset); 1960 else 1961 shader_arb_get_dst_param(ins, dst, reg_dest); 1962 shader_addline(buffer, "SWZ TA, %s, x, y, z, 1;\n", reg_dest); 1963 shader_addline(buffer, "KIL TA;\n"); 1964 } 1965 } 1966 1967 static void pshader_hw_tex(const struct wined3d_shader_instruction *ins) 1968 { 1969 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1970 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1971 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 1972 ins->ctx->reg_maps->shader_version.minor); 1973 struct wined3d_shader_src_param src; 1974 1975 char reg_dest[40]; 1976 char reg_coord[40]; 1977 DWORD reg_sampler_code; 1978 WORD myflags = 0; 1979 BOOL swizzle_coord = FALSE; 1980 1981 /* All versions have a destination register */ 1982 shader_arb_get_dst_param(ins, dst, reg_dest); 1983 1984 /* 1.0-1.4: Use destination register number as texture code. 1985 2.0+: Use provided sampler number as texture code. */ 1986 if (shader_version < WINED3D_SHADER_VERSION(2,0)) 1987 reg_sampler_code = dst->reg.idx[0].offset; 1988 else 1989 reg_sampler_code = ins->src[1].reg.idx[0].offset; 1990 1991 /* 1.0-1.3: Use the texcoord varying. 1992 1.4+: Use provided coordinate source register. */ 1993 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 1994 sprintf(reg_coord, "fragment.texcoord[%u]", reg_sampler_code); 1995 else { 1996 /* TEX is the only instruction that can handle DW and DZ natively */ 1997 src = ins->src[0]; 1998 if(src.modifiers == WINED3DSPSM_DW) src.modifiers = WINED3DSPSM_NONE; 1999 if(src.modifiers == WINED3DSPSM_DZ) src.modifiers = WINED3DSPSM_NONE; 2000 shader_arb_get_src_param(ins, &src, 0, reg_coord); 2001 } 2002 2003 /* projection flag: 2004 * 1.1, 1.2, 1.3: Use WINED3D_TSS_TEXTURETRANSFORMFLAGS 2005 * 1.4: Use WINED3DSPSM_DZ or WINED3DSPSM_DW on src[0] 2006 * 2.0+: Use WINED3DSI_TEXLD_PROJECT on the opcode 2007 */ 2008 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 2009 { 2010 DWORD flags = 0; 2011 if (reg_sampler_code < MAX_TEXTURES) 2012 flags = priv->cur_ps_args->super.tex_transform >> reg_sampler_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT; 2013 if (flags & WINED3D_PSARGS_PROJECTED) 2014 { 2015 myflags |= TEX_PROJ; 2016 if ((flags & ~WINED3D_PSARGS_PROJECTED) == WINED3D_TTFF_COUNT3) 2017 swizzle_coord = TRUE; 2018 } 2019 } 2020 else if (shader_version < WINED3D_SHADER_VERSION(2,0)) 2021 { 2022 enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers; 2023 if (src_mod == WINED3DSPSM_DZ) 2024 { 2025 swizzle_coord = TRUE; 2026 myflags |= TEX_PROJ; 2027 } else if(src_mod == WINED3DSPSM_DW) { 2028 myflags |= TEX_PROJ; 2029 } 2030 } else { 2031 if (ins->flags & WINED3DSI_TEXLD_PROJECT) myflags |= TEX_PROJ; 2032 if (ins->flags & WINED3DSI_TEXLD_BIAS) myflags |= TEX_BIAS; 2033 } 2034 2035 if (swizzle_coord) 2036 { 2037 /* TXP cannot handle DZ natively, so move the z coordinate to .w. 2038 * reg_coord is a read-only varying register, so we need a temp reg */ 2039 shader_addline(ins->ctx->buffer, "SWZ TA, %s, x, y, z, z;\n", reg_coord); 2040 strcpy(reg_coord, "TA"); 2041 } 2042 2043 shader_hw_sample(ins, reg_sampler_code, reg_dest, reg_coord, myflags, NULL, NULL); 2044 } 2045 2046 static void pshader_hw_texcoord(const struct wined3d_shader_instruction *ins) 2047 { 2048 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2049 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2050 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 2051 ins->ctx->reg_maps->shader_version.minor); 2052 char dst_str[50]; 2053 2054 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 2055 { 2056 DWORD reg = dst->reg.idx[0].offset; 2057 2058 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2059 shader_addline(buffer, "MOV_SAT %s, fragment.texcoord[%u];\n", dst_str, reg); 2060 } else { 2061 char reg_src[40]; 2062 2063 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src); 2064 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2065 shader_addline(buffer, "MOV %s, %s;\n", dst_str, reg_src); 2066 } 2067 } 2068 2069 static void pshader_hw_texreg2ar(const struct wined3d_shader_instruction *ins) 2070 { 2071 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2072 DWORD flags = 0; 2073 2074 DWORD reg1 = ins->dst[0].reg.idx[0].offset; 2075 char dst_str[50]; 2076 char src_str[50]; 2077 2078 /* Note that texreg2ar treats Tx as a temporary register, not as a varying */ 2079 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2080 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 2081 /* Move .x first in case src_str is "TA" */ 2082 shader_addline(buffer, "MOV TA.y, %s.x;\n", src_str); 2083 shader_addline(buffer, "MOV TA.x, %s.w;\n", src_str); 2084 if (reg1 < MAX_TEXTURES) 2085 { 2086 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2087 flags = priv->cur_ps_args->super.tex_transform >> reg1 * WINED3D_PSARGS_TEXTRANSFORM_SHIFT; 2088 } 2089 shader_hw_sample(ins, reg1, dst_str, "TA", flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2090 } 2091 2092 static void pshader_hw_texreg2gb(const struct wined3d_shader_instruction *ins) 2093 { 2094 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2095 2096 DWORD reg1 = ins->dst[0].reg.idx[0].offset; 2097 char dst_str[50]; 2098 char src_str[50]; 2099 2100 /* Note that texreg2gb treats Tx as a temporary register, not as a varying */ 2101 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2102 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 2103 shader_addline(buffer, "MOV TA.x, %s.y;\n", src_str); 2104 shader_addline(buffer, "MOV TA.y, %s.z;\n", src_str); 2105 shader_hw_sample(ins, reg1, dst_str, "TA", 0, NULL, NULL); 2106 } 2107 2108 static void pshader_hw_texreg2rgb(const struct wined3d_shader_instruction *ins) 2109 { 2110 DWORD reg1 = ins->dst[0].reg.idx[0].offset; 2111 char dst_str[50]; 2112 char src_str[50]; 2113 2114 /* Note that texreg2rg treats Tx as a temporary register, not as a varying */ 2115 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2116 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 2117 shader_hw_sample(ins, reg1, dst_str, src_str, 0, NULL, NULL); 2118 } 2119 2120 static void pshader_hw_texbem(const struct wined3d_shader_instruction *ins) 2121 { 2122 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2123 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2124 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2125 char reg_coord[40], dst_reg[50], src_reg[50]; 2126 DWORD reg_dest_code; 2127 2128 /* All versions have a destination register. The Tx where the texture coordinates come 2129 * from is the varying incarnation of the texture register 2130 */ 2131 reg_dest_code = dst->reg.idx[0].offset; 2132 shader_arb_get_dst_param(ins, &ins->dst[0], dst_reg); 2133 shader_arb_get_src_param(ins, &ins->src[0], 0, src_reg); 2134 sprintf(reg_coord, "fragment.texcoord[%u]", reg_dest_code); 2135 2136 /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed 2137 * The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register 2138 * 2139 * GL_NV_fragment_program_option could handle this in one instruction via X2D: 2140 * X2D TA.xy, fragment.texcoord, T%u, bumpenvmat%u.xzyw 2141 * 2142 * However, the NV extensions are never enabled for <= 2.0 shaders because of the performance penalty that 2143 * comes with it, and texbem is an 1.x only instruction. No 1.x instruction forces us to enable the NV 2144 * extension. 2145 */ 2146 shader_addline(buffer, "SWZ TB, bumpenvmat%d, x, z, 0, 0;\n", reg_dest_code); 2147 shader_addline(buffer, "DP3 TA.x, TB, %s;\n", src_reg); 2148 shader_addline(buffer, "SWZ TB, bumpenvmat%d, y, w, 0, 0;\n", reg_dest_code); 2149 shader_addline(buffer, "DP3 TA.y, TB, %s;\n", src_reg); 2150 2151 /* with projective textures, texbem only divides the static texture coord, not the displacement, 2152 * so we can't let the GL handle this. 2153 */ 2154 if ((priv->cur_ps_args->super.tex_transform >> reg_dest_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT) 2155 & WINED3D_PSARGS_PROJECTED) 2156 { 2157 shader_addline(buffer, "RCP TB.w, %s.w;\n", reg_coord); 2158 shader_addline(buffer, "MUL TB.xy, %s, TB.w;\n", reg_coord); 2159 shader_addline(buffer, "ADD TA.xy, TA, TB;\n"); 2160 } else { 2161 shader_addline(buffer, "ADD TA.xy, TA, %s;\n", reg_coord); 2162 } 2163 2164 shader_hw_sample(ins, reg_dest_code, dst_reg, "TA", 0, NULL, NULL); 2165 2166 if (ins->handler_idx == WINED3DSIH_TEXBEML) 2167 { 2168 /* No src swizzles are allowed, so this is ok */ 2169 shader_addline(buffer, "MAD TA, %s.z, luminance%d.x, luminance%d.y;\n", 2170 src_reg, reg_dest_code, reg_dest_code); 2171 shader_addline(buffer, "MUL %s, %s, TA;\n", dst_reg, dst_reg); 2172 } 2173 } 2174 2175 static void pshader_hw_texm3x2pad(const struct wined3d_shader_instruction *ins) 2176 { 2177 DWORD reg = ins->dst[0].reg.idx[0].offset; 2178 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2179 char src0_name[50], dst_name[50]; 2180 BOOL is_color; 2181 struct wined3d_shader_register tmp_reg = ins->dst[0].reg; 2182 2183 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2184 /* The next instruction will be a texm3x2tex or texm3x2depth that writes to the uninitialized 2185 * T<reg+1> register. Use this register to store the calculated vector 2186 */ 2187 tmp_reg.idx[0].offset = reg + 1; 2188 shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color); 2189 shader_addline(buffer, "DP3 %s.x, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name); 2190 } 2191 2192 static void pshader_hw_texm3x2tex(const struct wined3d_shader_instruction *ins) 2193 { 2194 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2195 DWORD flags; 2196 DWORD reg = ins->dst[0].reg.idx[0].offset; 2197 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2198 char dst_str[50]; 2199 char src0_name[50]; 2200 char dst_reg[50]; 2201 BOOL is_color; 2202 2203 /* We know that we're writing to the uninitialized T<reg> register, so use it for temporary storage */ 2204 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2205 2206 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2207 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2208 shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2209 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2210 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2211 } 2212 2213 static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins) 2214 { 2215 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2216 DWORD reg = ins->dst[0].reg.idx[0].offset; 2217 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2218 char src0_name[50], dst_name[50]; 2219 struct wined3d_shader_register tmp_reg = ins->dst[0].reg; 2220 BOOL is_color; 2221 2222 /* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with 2223 * incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination 2224 * register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared') 2225 */ 2226 tmp_reg.idx[0].offset = reg + 2 - tex_mx->current_row; 2227 shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color); 2228 2229 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2230 shader_addline(buffer, "DP3 %s.%c, fragment.texcoord[%u], %s;\n", 2231 dst_name, 'x' + tex_mx->current_row, reg, src0_name); 2232 tex_mx->texcoord_w[tex_mx->current_row++] = reg; 2233 } 2234 2235 static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins) 2236 { 2237 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2238 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2239 DWORD flags; 2240 DWORD reg = ins->dst[0].reg.idx[0].offset; 2241 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2242 char dst_str[50]; 2243 char src0_name[50], dst_name[50]; 2244 BOOL is_color; 2245 2246 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2247 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2248 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name); 2249 2250 /* Sample the texture using the calculated coordinates */ 2251 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2252 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2253 shader_hw_sample(ins, reg, dst_str, dst_name, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2254 tex_mx->current_row = 0; 2255 } 2256 2257 static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins) 2258 { 2259 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2260 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2261 DWORD flags; 2262 DWORD reg = ins->dst[0].reg.idx[0].offset; 2263 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2264 char dst_str[50]; 2265 char src0_name[50]; 2266 char dst_reg[50]; 2267 BOOL is_color; 2268 2269 /* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all 2270 * components for temporary data storage 2271 */ 2272 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2273 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2274 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2275 2276 /* Construct the eye-ray vector from w coordinates */ 2277 shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[0]); 2278 shader_addline(buffer, "MOV TB.y, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[1]); 2279 shader_addline(buffer, "MOV TB.z, fragment.texcoord[%u].w;\n", reg); 2280 2281 /* Calculate reflection vector 2282 */ 2283 shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg); 2284 /* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */ 2285 shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg); 2286 shader_addline(buffer, "RCP TB.w, TB.w;\n"); 2287 shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg); 2288 shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg); 2289 shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg); 2290 2291 /* Sample the texture using the calculated coordinates */ 2292 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2293 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2294 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2295 tex_mx->current_row = 0; 2296 } 2297 2298 static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins) 2299 { 2300 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2301 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2302 DWORD flags; 2303 DWORD reg = ins->dst[0].reg.idx[0].offset; 2304 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2305 char dst_str[50]; 2306 char src0_name[50]; 2307 char src1_name[50]; 2308 char dst_reg[50]; 2309 BOOL is_color; 2310 2311 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2312 shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name); 2313 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2314 /* Note: dst_reg.xy is input here, generated by two texm3x3pad instructions */ 2315 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2316 2317 /* Calculate reflection vector. 2318 * 2319 * dot(N, E) 2320 * dst_reg.xyz = 2 * --------- * N - E 2321 * dot(N, N) 2322 * 2323 * Which normalizes the normal vector 2324 */ 2325 shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name); 2326 shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg); 2327 shader_addline(buffer, "RCP TC.w, TC.w;\n"); 2328 shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg); 2329 shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg); 2330 shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name); 2331 2332 /* Sample the texture using the calculated coordinates */ 2333 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2334 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2335 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2336 tex_mx->current_row = 0; 2337 } 2338 2339 static void pshader_hw_texdepth(const struct wined3d_shader_instruction *ins) 2340 { 2341 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2342 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2343 char dst_name[50]; 2344 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2345 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2346 2347 /* texdepth has an implicit destination, the fragment depth value. It's only parameter, 2348 * which is essentially an input, is the destination register because it is the first 2349 * parameter. According to the msdn, this must be register r5, but let's keep it more flexible 2350 * here(writemasks/swizzles are not valid on texdepth) 2351 */ 2352 shader_arb_get_dst_param(ins, dst, dst_name); 2353 2354 /* According to the msdn, the source register(must be r5) is unusable after 2355 * the texdepth instruction, so we're free to modify it 2356 */ 2357 shader_addline(buffer, "MIN %s.y, %s.y, %s;\n", dst_name, dst_name, one); 2358 2359 /* How to deal with the special case dst_name.g == 0? if r != 0, then 2360 * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct 2361 * result. But if r = 0.0, then 0 * inf = 0, which is incorrect. 2362 */ 2363 shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name); 2364 shader_addline(buffer, "MUL TA.x, %s.x, %s.y;\n", dst_name, dst_name); 2365 shader_addline(buffer, "MIN TA.x, TA.x, %s;\n", one); 2366 shader_addline(buffer, "MAX result.depth, TA.x, %s;\n", zero); 2367 } 2368 2369 /** Process the WINED3DSIO_TEXDP3TEX instruction in ARB: 2370 * Take a 3-component dot product of the TexCoord[dstreg] and src, 2371 * then perform a 1D texture lookup from stage dstregnum, place into dst. */ 2372 static void pshader_hw_texdp3tex(const struct wined3d_shader_instruction *ins) 2373 { 2374 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2375 DWORD sampler_idx = ins->dst[0].reg.idx[0].offset; 2376 char src0[50]; 2377 char dst_str[50]; 2378 2379 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2380 shader_addline(buffer, "MOV TB, 0.0;\n"); 2381 shader_addline(buffer, "DP3 TB.x, fragment.texcoord[%u], %s;\n", sampler_idx, src0); 2382 2383 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2384 shader_hw_sample(ins, sampler_idx, dst_str, "TB", 0 /* Only one coord, can't be projected */, NULL, NULL); 2385 } 2386 2387 /** Process the WINED3DSIO_TEXDP3 instruction in ARB: 2388 * Take a 3-component dot product of the TexCoord[dstreg] and src. */ 2389 static void pshader_hw_texdp3(const struct wined3d_shader_instruction *ins) 2390 { 2391 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2392 char src0[50]; 2393 char dst_str[50]; 2394 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2395 2396 /* Handle output register */ 2397 shader_arb_get_dst_param(ins, dst, dst_str); 2398 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2399 shader_addline(buffer, "DP3 %s, fragment.texcoord[%u], %s;\n", dst_str, dst->reg.idx[0].offset, src0); 2400 } 2401 2402 /** Process the WINED3DSIO_TEXM3X3 instruction in ARB 2403 * Perform the 3rd row of a 3x3 matrix multiply */ 2404 static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins) 2405 { 2406 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2407 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2408 char dst_str[50], dst_name[50]; 2409 char src0[50]; 2410 BOOL is_color; 2411 2412 shader_arb_get_dst_param(ins, dst, dst_str); 2413 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2414 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2415 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0); 2416 shader_addline(buffer, "MOV %s, %s;\n", dst_str, dst_name); 2417 } 2418 2419 /** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB: 2420 * Last row of a 3x2 matrix multiply, use the result to calculate the depth: 2421 * Calculate tmp0.y = TexCoord[dstreg] . src.xyz; (tmp0.x has already been calculated) 2422 * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y 2423 */ 2424 static void pshader_hw_texm3x2depth(const struct wined3d_shader_instruction *ins) 2425 { 2426 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2427 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2428 char src0[50], dst_name[50]; 2429 BOOL is_color; 2430 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2431 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2432 2433 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2434 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2435 shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0); 2436 2437 /* How to deal with the special case dst_name.g == 0? if r != 0, then 2438 * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct 2439 * result. But if r = 0.0, then 0 * inf = 0, which is incorrect. 2440 */ 2441 shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name); 2442 shader_addline(buffer, "MUL %s.x, %s.x, %s.y;\n", dst_name, dst_name, dst_name); 2443 shader_addline(buffer, "MIN %s.x, %s.x, %s;\n", dst_name, dst_name, one); 2444 shader_addline(buffer, "MAX result.depth, %s.x, %s;\n", dst_name, zero); 2445 } 2446 2447 /** Handles transforming all WINED3DSIO_M?x? opcodes for 2448 Vertex/Pixel shaders to ARB_vertex_program codes */ 2449 static void shader_hw_mnxn(const struct wined3d_shader_instruction *ins) 2450 { 2451 int i; 2452 int nComponents = 0; 2453 struct wined3d_shader_dst_param tmp_dst = {{0}}; 2454 struct wined3d_shader_src_param tmp_src[2] = {{{0}}}; 2455 struct wined3d_shader_instruction tmp_ins; 2456 2457 memset(&tmp_ins, 0, sizeof(tmp_ins)); 2458 2459 /* Set constants for the temporary argument */ 2460 tmp_ins.ctx = ins->ctx; 2461 tmp_ins.dst_count = 1; 2462 tmp_ins.dst = &tmp_dst; 2463 tmp_ins.src_count = 2; 2464 tmp_ins.src = tmp_src; 2465 2466 switch(ins->handler_idx) 2467 { 2468 case WINED3DSIH_M4x4: 2469 nComponents = 4; 2470 tmp_ins.handler_idx = WINED3DSIH_DP4; 2471 break; 2472 case WINED3DSIH_M4x3: 2473 nComponents = 3; 2474 tmp_ins.handler_idx = WINED3DSIH_DP4; 2475 break; 2476 case WINED3DSIH_M3x4: 2477 nComponents = 4; 2478 tmp_ins.handler_idx = WINED3DSIH_DP3; 2479 break; 2480 case WINED3DSIH_M3x3: 2481 nComponents = 3; 2482 tmp_ins.handler_idx = WINED3DSIH_DP3; 2483 break; 2484 case WINED3DSIH_M3x2: 2485 nComponents = 2; 2486 tmp_ins.handler_idx = WINED3DSIH_DP3; 2487 break; 2488 default: 2489 FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 2490 break; 2491 } 2492 2493 tmp_dst = ins->dst[0]; 2494 tmp_src[0] = ins->src[0]; 2495 tmp_src[1] = ins->src[1]; 2496 for (i = 0; i < nComponents; ++i) 2497 { 2498 tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i; 2499 shader_hw_map2gl(&tmp_ins); 2500 ++tmp_src[1].reg.idx[0].offset; 2501 } 2502 } 2503 2504 static DWORD abs_modifier(DWORD mod, BOOL *need_abs) 2505 { 2506 *need_abs = FALSE; 2507 2508 switch(mod) 2509 { 2510 case WINED3DSPSM_NONE: return WINED3DSPSM_ABS; 2511 case WINED3DSPSM_NEG: return WINED3DSPSM_ABS; 2512 case WINED3DSPSM_BIAS: *need_abs = TRUE; return WINED3DSPSM_BIAS; 2513 case WINED3DSPSM_BIASNEG: *need_abs = TRUE; return WINED3DSPSM_BIASNEG; 2514 case WINED3DSPSM_SIGN: *need_abs = TRUE; return WINED3DSPSM_SIGN; 2515 case WINED3DSPSM_SIGNNEG: *need_abs = TRUE; return WINED3DSPSM_SIGNNEG; 2516 case WINED3DSPSM_COMP: *need_abs = TRUE; return WINED3DSPSM_COMP; 2517 case WINED3DSPSM_X2: *need_abs = TRUE; return WINED3DSPSM_X2; 2518 case WINED3DSPSM_X2NEG: *need_abs = TRUE; return WINED3DSPSM_X2NEG; 2519 case WINED3DSPSM_DZ: *need_abs = TRUE; return WINED3DSPSM_DZ; 2520 case WINED3DSPSM_DW: *need_abs = TRUE; return WINED3DSPSM_DW; 2521 case WINED3DSPSM_ABS: return WINED3DSPSM_ABS; 2522 case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS; 2523 } 2524 FIXME("Unknown modifier %u\n", mod); 2525 return mod; 2526 } 2527 2528 static void shader_hw_scalar_op(const struct wined3d_shader_instruction *ins) 2529 { 2530 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2531 const char *instruction; 2532 struct wined3d_shader_src_param src0_copy = ins->src[0]; 2533 BOOL need_abs = FALSE; 2534 2535 char dst[50]; 2536 char src[50]; 2537 2538 switch(ins->handler_idx) 2539 { 2540 case WINED3DSIH_RSQ: instruction = "RSQ"; break; 2541 case WINED3DSIH_RCP: instruction = "RCP"; break; 2542 case WINED3DSIH_EXPP: 2543 if (ins->ctx->reg_maps->shader_version.major < 2) 2544 { 2545 instruction = "EXP"; 2546 break; 2547 } 2548 /* Drop through. */ 2549 case WINED3DSIH_EXP: 2550 instruction = "EX2"; 2551 break; 2552 case WINED3DSIH_LOG: 2553 case WINED3DSIH_LOGP: 2554 /* The precision requirements suggest that LOGP matches ARBvp's LOG 2555 * instruction, but notice that the output of those instructions is 2556 * different. */ 2557 src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs); 2558 instruction = "LG2"; 2559 break; 2560 default: instruction = ""; 2561 FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 2562 break; 2563 } 2564 2565 /* Dx sdk says .x is used if no swizzle is given, but our test shows that 2566 * .w is used. */ 2567 src0_copy.swizzle = shader_arb_select_component(src0_copy.swizzle, 3); 2568 2569 shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */ 2570 shader_arb_get_src_param(ins, &src0_copy, 0, src); 2571 2572 if(need_abs) 2573 { 2574 shader_addline(buffer, "ABS TA.w, %s;\n", src); 2575 shader_addline(buffer, "%s%s %s, TA.w;\n", instruction, shader_arb_get_modifier(ins), dst); 2576 } 2577 else 2578 { 2579 shader_addline(buffer, "%s%s %s, %s;\n", instruction, shader_arb_get_modifier(ins), dst, src); 2580 } 2581 2582 } 2583 2584 static void shader_hw_nrm(const struct wined3d_shader_instruction *ins) 2585 { 2586 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2587 char dst_name[50]; 2588 char src_name[50]; 2589 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2590 BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type); 2591 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2592 2593 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2594 shader_arb_get_src_param(ins, &ins->src[0], 1 /* Use TB */, src_name); 2595 2596 /* In D3D, NRM of a vector with length zero returns zero. Catch this situation, as 2597 * otherwise NRM or RSQ would return NaN */ 2598 if(pshader && priv->target_version >= NV3) 2599 { 2600 /* GL_NV_fragment_program2's NRM needs protection against length zero vectors too 2601 * 2602 * TODO: Find out if DP3+NRM+MOV is really faster than DP3+RSQ+MUL 2603 */ 2604 shader_addline(buffer, "DP3C TA, %s, %s;\n", src_name, src_name); 2605 shader_addline(buffer, "NRM%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name); 2606 shader_addline(buffer, "MOV %s (EQ), %s;\n", dst_name, zero); 2607 } 2608 else if(priv->target_version >= NV2) 2609 { 2610 shader_addline(buffer, "DP3C TA.x, %s, %s;\n", src_name, src_name); 2611 shader_addline(buffer, "RSQ TA.x (NE), TA.x;\n"); 2612 shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name, 2613 src_name); 2614 } 2615 else 2616 { 2617 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2618 2619 shader_addline(buffer, "DP3 TA.x, %s, %s;\n", src_name, src_name); 2620 /* Pass any non-zero value to RSQ if the input vector has a length of zero. The 2621 * RSQ result doesn't matter, as long as multiplying it by 0 returns 0. 2622 */ 2623 shader_addline(buffer, "SGE TA.y, -TA.x, %s;\n", zero); 2624 shader_addline(buffer, "MAD TA.x, %s, TA.y, TA.x;\n", one); 2625 2626 shader_addline(buffer, "RSQ TA.x, TA.x;\n"); 2627 /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/ 2628 shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name, 2629 src_name); 2630 } 2631 } 2632 2633 static void shader_hw_lrp(const struct wined3d_shader_instruction *ins) 2634 { 2635 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2636 char dst_name[50]; 2637 char src_name[3][50]; 2638 2639 /* ARB_fragment_program has a convenient LRP instruction */ 2640 if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) { 2641 shader_hw_map2gl(ins); 2642 return; 2643 } 2644 2645 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2646 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 2647 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 2648 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 2649 2650 shader_addline(buffer, "SUB TA, %s, %s;\n", src_name[1], src_name[2]); 2651 shader_addline(buffer, "MAD%s %s, %s, TA, %s;\n", shader_arb_get_modifier(ins), 2652 dst_name, src_name[0], src_name[2]); 2653 } 2654 2655 static void shader_hw_sincos(const struct wined3d_shader_instruction *ins) 2656 { 2657 /* This instruction exists in ARB, but the d3d instruction takes two extra parameters which 2658 * must contain fixed constants. So we need a separate function to filter those constants and 2659 * can't use map2gl 2660 */ 2661 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2662 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2663 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2664 char dst_name[50]; 2665 char src_name0[50], src_name1[50], src_name2[50]; 2666 BOOL is_color; 2667 2668 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 2669 if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) { 2670 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2671 /* No modifiers are supported on SCS */ 2672 shader_addline(buffer, "SCS %s, %s;\n", dst_name, src_name0); 2673 2674 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) 2675 { 2676 shader_arb_get_register_name(ins, &dst->reg, src_name0, &is_color); 2677 shader_addline(buffer, "MOV_SAT %s, %s;\n", dst_name, src_name0); 2678 } 2679 } else if(priv->target_version >= NV2) { 2680 shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color); 2681 2682 /* Sincos writemask must be .x, .y or .xy */ 2683 if(dst->write_mask & WINED3DSP_WRITEMASK_0) 2684 shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0); 2685 if(dst->write_mask & WINED3DSP_WRITEMASK_1) 2686 shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0); 2687 } else { 2688 /* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8 2689 * helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2. 2690 * 2691 * sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ... 2692 * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ... 2693 * 2694 * The constants we get are: 2695 * 2696 * +1 +1, -1 -1 +1 +1 -1 -1 2697 * ---- , ---- , ---- , ----- , ----- , ----- , ------ 2698 * 1!*2 2!*4 3!*8 4!*16 5!*32 6!*64 7!*128 2699 * 2700 * If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2): 2701 * 2702 * (x/2)^2 = x^2 / 4 2703 * (x/2)^3 = x^3 / 8 2704 * (x/2)^4 = x^4 / 16 2705 * (x/2)^5 = x^5 / 32 2706 * etc 2707 * 2708 * To get the final result: 2709 * sin(x) = 2 * sin(x/2) * cos(x/2) 2710 * cos(x) = cos(x/2)^2 - sin(x/2)^2 2711 * (from sin(x+y) and cos(x+y) rules) 2712 * 2713 * As per MSDN, dst.z is undefined after the operation, and so is 2714 * dst.x and dst.y if they're masked out by the writemask. Ie 2715 * sincos dst.y, src1, c0, c1 2716 * returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler 2717 * vsa.exe also stops with an error if the dest register is the same register as the source 2718 * register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also 2719 * indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0). 2720 */ 2721 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 2722 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2); 2723 shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color); 2724 2725 shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0); /* x ^ 2 */ 2726 shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0); /* x ^ 3 */ 2727 shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0); /* x ^ 4 */ 2728 shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0); /* x ^ 5 */ 2729 shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0); /* x ^ 6 */ 2730 shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0); /* x ^ 7 */ 2731 2732 /* sin(x/2) 2733 * 2734 * Unfortunately we don't get the constants in a DP4-capable form. Is there a way to 2735 * properly merge that with MULs in the code above? 2736 * The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe 2737 * we can merge the sine and cosine MAD rows to calculate them together. 2738 */ 2739 shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */ 2740 shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */ 2741 shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */ 2742 shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */ 2743 2744 /* cos(x/2) */ 2745 shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */ 2746 shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */ 2747 shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */ 2748 2749 if(dst->write_mask & WINED3DSP_WRITEMASK_0) { 2750 /* cos x */ 2751 shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n"); 2752 shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name); 2753 } 2754 if(dst->write_mask & WINED3DSP_WRITEMASK_1) { 2755 /* sin x */ 2756 shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name); 2757 shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name); 2758 } 2759 } 2760 } 2761 2762 static void shader_hw_sgn(const struct wined3d_shader_instruction *ins) 2763 { 2764 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2765 char dst_name[50]; 2766 char src_name[50]; 2767 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 2768 2769 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2770 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name); 2771 2772 /* SGN is only valid in vertex shaders */ 2773 if(ctx->target_version >= NV2) { 2774 shader_addline(buffer, "SSG%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name); 2775 return; 2776 } 2777 2778 /* If SRC > 0.0, -SRC < SRC = TRUE, otherwise false. 2779 * if SRC < 0.0, SRC < -SRC = TRUE. If neither is true, src = 0.0 2780 */ 2781 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) { 2782 shader_addline(buffer, "SLT %s, -%s, %s;\n", dst_name, src_name, src_name); 2783 } else { 2784 /* src contains TA? Write to the dest first. This won't overwrite our destination. 2785 * Then use TA, and calculate the final result 2786 * 2787 * Not reading from TA? Store the first result in TA to avoid overwriting the 2788 * destination if src reg = dst reg 2789 */ 2790 if(strstr(src_name, "TA")) 2791 { 2792 shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name); 2793 shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name); 2794 shader_addline(buffer, "ADD %s, %s, -TA;\n", dst_name, dst_name); 2795 } 2796 else 2797 { 2798 shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name); 2799 shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name); 2800 shader_addline(buffer, "ADD %s, TA, -%s;\n", dst_name, dst_name); 2801 } 2802 } 2803 } 2804 2805 static void shader_hw_dsy(const struct wined3d_shader_instruction *ins) 2806 { 2807 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2808 char src[50]; 2809 char dst[50]; 2810 char dst_name[50]; 2811 BOOL is_color; 2812 2813 shader_arb_get_dst_param(ins, &ins->dst[0], dst); 2814 shader_arb_get_src_param(ins, &ins->src[0], 0, src); 2815 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2816 2817 shader_addline(buffer, "DDY %s, %s;\n", dst, src); 2818 shader_addline(buffer, "MUL%s %s, %s, ycorrection.y;\n", shader_arb_get_modifier(ins), dst, dst_name); 2819 } 2820 2821 static void shader_hw_pow(const struct wined3d_shader_instruction *ins) 2822 { 2823 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2824 char src0[50], src1[50], dst[50]; 2825 struct wined3d_shader_src_param src0_copy = ins->src[0]; 2826 BOOL need_abs = FALSE; 2827 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2828 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2829 2830 /* POW operates on the absolute value of the input */ 2831 src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs); 2832 2833 shader_arb_get_dst_param(ins, &ins->dst[0], dst); 2834 shader_arb_get_src_param(ins, &src0_copy, 0, src0); 2835 shader_arb_get_src_param(ins, &ins->src[1], 1, src1); 2836 2837 if (need_abs) 2838 shader_addline(buffer, "ABS TA.x, %s;\n", src0); 2839 else 2840 shader_addline(buffer, "MOV TA.x, %s;\n", src0); 2841 2842 if (priv->target_version >= NV2) 2843 { 2844 shader_addline(buffer, "MOVC TA.y, %s;\n", src1); 2845 shader_addline(buffer, "POW%s %s, TA.x, TA.y;\n", shader_arb_get_modifier(ins), dst); 2846 shader_addline(buffer, "MOV %s (EQ.y), %s;\n", dst, one); 2847 } 2848 else 2849 { 2850 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2851 const char *flt_eps = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_EPS); 2852 2853 shader_addline(buffer, "ABS TA.y, %s;\n", src1); 2854 shader_addline(buffer, "SGE TA.y, -TA.y, %s;\n", zero); 2855 /* Possibly add flt_eps to avoid getting float special values */ 2856 shader_addline(buffer, "MAD TA.z, TA.y, %s, %s;\n", flt_eps, src1); 2857 shader_addline(buffer, "POW%s TA.x, TA.x, TA.z;\n", shader_arb_get_modifier(ins)); 2858 shader_addline(buffer, "MAD TA.x, -TA.x, TA.y, TA.x;\n"); 2859 shader_addline(buffer, "MAD %s, TA.y, %s, TA.x;\n", dst, one); 2860 } 2861 } 2862 2863 static void shader_hw_loop(const struct wined3d_shader_instruction *ins) 2864 { 2865 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2866 char src_name[50]; 2867 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2868 2869 /* src0 is aL */ 2870 shader_arb_get_src_param(ins, &ins->src[1], 0, src_name); 2871 2872 if(vshader) 2873 { 2874 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2875 struct list *e = list_head(&priv->control_frames); 2876 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2877 2878 if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n"); 2879 /* The constant loader makes sure to load -1 into iX.w */ 2880 shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name); 2881 shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop); 2882 shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop); 2883 } 2884 else 2885 { 2886 shader_addline(buffer, "LOOP %s;\n", src_name); 2887 } 2888 } 2889 2890 static void shader_hw_rep(const struct wined3d_shader_instruction *ins) 2891 { 2892 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2893 char src_name[50]; 2894 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2895 2896 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name); 2897 2898 /* The constant loader makes sure to load -1 into iX.w */ 2899 if(vshader) 2900 { 2901 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2902 struct list *e = list_head(&priv->control_frames); 2903 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2904 2905 if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n"); 2906 2907 shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name); 2908 shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop); 2909 shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop); 2910 } 2911 else 2912 { 2913 shader_addline(buffer, "REP %s;\n", src_name); 2914 } 2915 } 2916 2917 static void shader_hw_endloop(const struct wined3d_shader_instruction *ins) 2918 { 2919 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2920 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2921 2922 if(vshader) 2923 { 2924 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2925 struct list *e = list_head(&priv->control_frames); 2926 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2927 2928 shader_addline(buffer, "ARAC aL.xy, aL;\n"); 2929 shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop); 2930 shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop); 2931 2932 if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n"); 2933 } 2934 else 2935 { 2936 shader_addline(buffer, "ENDLOOP;\n"); 2937 } 2938 } 2939 2940 static void shader_hw_endrep(const struct wined3d_shader_instruction *ins) 2941 { 2942 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2943 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2944 2945 if(vshader) 2946 { 2947 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2948 struct list *e = list_head(&priv->control_frames); 2949 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2950 2951 shader_addline(buffer, "ARAC aL.xy, aL;\n"); 2952 shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop); 2953 shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop); 2954 2955 if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n"); 2956 } 2957 else 2958 { 2959 shader_addline(buffer, "ENDREP;\n"); 2960 } 2961 } 2962 2963 static const struct control_frame *find_last_loop(const struct shader_arb_ctx_priv *priv) 2964 { 2965 struct control_frame *control_frame; 2966 2967 LIST_FOR_EACH_ENTRY(control_frame, &priv->control_frames, struct control_frame, entry) 2968 { 2969 if(control_frame->type == LOOP || control_frame->type == REP) return control_frame; 2970 } 2971 ERR("Could not find loop for break\n"); 2972 return NULL; 2973 } 2974 2975 static void shader_hw_break(const struct wined3d_shader_instruction *ins) 2976 { 2977 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2978 const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data); 2979 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2980 2981 if(vshader) 2982 { 2983 shader_addline(buffer, "BRA loop_%u_end;\n", control_frame->no.loop); 2984 } 2985 else 2986 { 2987 shader_addline(buffer, "BRK;\n"); 2988 } 2989 } 2990 2991 static const char *get_compare(enum wined3d_shader_rel_op op) 2992 { 2993 switch (op) 2994 { 2995 case WINED3D_SHADER_REL_OP_GT: return "GT"; 2996 case WINED3D_SHADER_REL_OP_EQ: return "EQ"; 2997 case WINED3D_SHADER_REL_OP_GE: return "GE"; 2998 case WINED3D_SHADER_REL_OP_LT: return "LT"; 2999 case WINED3D_SHADER_REL_OP_NE: return "NE"; 3000 case WINED3D_SHADER_REL_OP_LE: return "LE"; 3001 default: 3002 FIXME("Unrecognized operator %#x.\n", op); 3003 return "(\?\?)"; 3004 } 3005 } 3006 3007 static enum wined3d_shader_rel_op invert_compare(enum wined3d_shader_rel_op op) 3008 { 3009 switch (op) 3010 { 3011 case WINED3D_SHADER_REL_OP_GT: return WINED3D_SHADER_REL_OP_LE; 3012 case WINED3D_SHADER_REL_OP_EQ: return WINED3D_SHADER_REL_OP_NE; 3013 case WINED3D_SHADER_REL_OP_GE: return WINED3D_SHADER_REL_OP_LT; 3014 case WINED3D_SHADER_REL_OP_LT: return WINED3D_SHADER_REL_OP_GE; 3015 case WINED3D_SHADER_REL_OP_NE: return WINED3D_SHADER_REL_OP_EQ; 3016 case WINED3D_SHADER_REL_OP_LE: return WINED3D_SHADER_REL_OP_GT; 3017 default: 3018 FIXME("Unrecognized operator %#x.\n", op); 3019 return -1; 3020 } 3021 } 3022 3023 static void shader_hw_breakc(const struct wined3d_shader_instruction *ins) 3024 { 3025 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3026 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3027 const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data); 3028 char src_name0[50]; 3029 char src_name1[50]; 3030 const char *comp = get_compare(ins->flags); 3031 3032 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 3033 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 3034 3035 if(vshader) 3036 { 3037 /* SUBC CC, src0, src1" works only in pixel shaders, so use TA to throw 3038 * away the subtraction result 3039 */ 3040 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3041 shader_addline(buffer, "BRA loop_%u_end (%s.x);\n", control_frame->no.loop, comp); 3042 } 3043 else 3044 { 3045 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3046 shader_addline(buffer, "BRK (%s.x);\n", comp); 3047 } 3048 } 3049 3050 static void shader_hw_ifc(const struct wined3d_shader_instruction *ins) 3051 { 3052 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3053 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3054 struct list *e = list_head(&priv->control_frames); 3055 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 3056 const char *comp; 3057 char src_name0[50]; 3058 char src_name1[50]; 3059 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3060 3061 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 3062 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 3063 3064 if(vshader) 3065 { 3066 /* Invert the flag. We jump to the else label if the condition is NOT true */ 3067 comp = get_compare(invert_compare(ins->flags)); 3068 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3069 shader_addline(buffer, "BRA ifc_%u_else (%s.x);\n", control_frame->no.ifc, comp); 3070 } 3071 else 3072 { 3073 comp = get_compare(ins->flags); 3074 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3075 shader_addline(buffer, "IF %s.x;\n", comp); 3076 } 3077 } 3078 3079 static void shader_hw_else(const struct wined3d_shader_instruction *ins) 3080 { 3081 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3082 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3083 struct list *e = list_head(&priv->control_frames); 3084 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 3085 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3086 3087 if(vshader) 3088 { 3089 shader_addline(buffer, "BRA ifc_%u_endif;\n", control_frame->no.ifc); 3090 shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc); 3091 control_frame->had_else = TRUE; 3092 } 3093 else 3094 { 3095 shader_addline(buffer, "ELSE;\n"); 3096 } 3097 } 3098 3099 static void shader_hw_endif(const struct wined3d_shader_instruction *ins) 3100 { 3101 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3102 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3103 struct list *e = list_head(&priv->control_frames); 3104 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 3105 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3106 3107 if(vshader) 3108 { 3109 if(control_frame->had_else) 3110 { 3111 shader_addline(buffer, "ifc_%u_endif:\n", control_frame->no.ifc); 3112 } 3113 else 3114 { 3115 shader_addline(buffer, "#No else branch. else is endif\n"); 3116 shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc); 3117 } 3118 } 3119 else 3120 { 3121 shader_addline(buffer, "ENDIF;\n"); 3122 } 3123 } 3124 3125 static void shader_hw_texldd(const struct wined3d_shader_instruction *ins) 3126 { 3127 DWORD sampler_idx = ins->src[1].reg.idx[0].offset; 3128 char reg_dest[40]; 3129 char reg_src[3][40]; 3130 WORD flags = TEX_DERIV; 3131 3132 shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest); 3133 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src[0]); 3134 shader_arb_get_src_param(ins, &ins->src[2], 1, reg_src[1]); 3135 shader_arb_get_src_param(ins, &ins->src[3], 2, reg_src[2]); 3136 3137 if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ; 3138 if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS; 3139 3140 shader_hw_sample(ins, sampler_idx, reg_dest, reg_src[0], flags, reg_src[1], reg_src[2]); 3141 } 3142 3143 static void shader_hw_texldl(const struct wined3d_shader_instruction *ins) 3144 { 3145 DWORD sampler_idx = ins->src[1].reg.idx[0].offset; 3146 char reg_dest[40]; 3147 char reg_coord[40]; 3148 WORD flags = TEX_LOD; 3149 3150 shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest); 3151 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_coord); 3152 3153 if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ; 3154 if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS; 3155 3156 shader_hw_sample(ins, sampler_idx, reg_dest, reg_coord, flags, NULL, NULL); 3157 } 3158 3159 static void shader_hw_label(const struct wined3d_shader_instruction *ins) 3160 { 3161 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3162 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3163 3164 priv->in_main_func = FALSE; 3165 /* Call instructions activate the NV extensions, not labels and rets. If there is an uncalled 3166 * subroutine, don't generate a label that will make GL complain 3167 */ 3168 if(priv->target_version == ARB) return; 3169 3170 shader_addline(buffer, "l%u:\n", ins->src[0].reg.idx[0].offset); 3171 } 3172 3173 static void vshader_add_footer(struct shader_arb_ctx_priv *priv_ctx, 3174 const struct arb_vshader_private *shader_data, const struct arb_vs_compile_args *args, 3175 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info, 3176 struct wined3d_string_buffer *buffer) 3177 { 3178 unsigned int i; 3179 3180 /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used 3181 * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE), 3182 * the fog frag coord is thrown away. If the fog frag coord is used, but not written by 3183 * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0) 3184 */ 3185 if (args->super.fog_src == VS_FOG_Z) 3186 { 3187 shader_addline(buffer, "MOV result.fogcoord, TMP_OUT.z;\n"); 3188 } 3189 else 3190 { 3191 if (!reg_maps->fog) 3192 { 3193 /* posFixup.x is always 1.0, so we can safely use it */ 3194 shader_addline(buffer, "ADD result.fogcoord, posFixup.x, -posFixup.x;\n"); 3195 } 3196 else 3197 { 3198 /* Clamp fogcoord */ 3199 const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO); 3200 const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE); 3201 3202 shader_addline(buffer, "MIN TMP_FOGCOORD.x, TMP_FOGCOORD.x, %s;\n", one); 3203 shader_addline(buffer, "MAX result.fogcoord.x, TMP_FOGCOORD.x, %s;\n", zero); 3204 } 3205 } 3206 3207 /* Clipplanes are always stored without y inversion */ 3208 if (use_nv_clip(gl_info) && priv_ctx->target_version >= NV2) 3209 { 3210 if (args->super.clip_enabled) 3211 { 3212 for (i = 0; i < priv_ctx->vs_clipplanes; i++) 3213 { 3214 shader_addline(buffer, "DP4 result.clip[%u].x, TMP_OUT, state.clip[%u].plane;\n", i, i); 3215 } 3216 } 3217 } 3218 else if (args->clip.boolclip.clip_texcoord) 3219 { 3220 static const char component[4] = {'x', 'y', 'z', 'w'}; 3221 unsigned int cur_clip = 0; 3222 const char *zero = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ZERO); 3223 3224 for (i = 0; i < gl_info->limits.user_clip_distances; ++i) 3225 { 3226 if (args->clip.boolclip.clipplane_mask & (1u << i)) 3227 { 3228 shader_addline(buffer, "DP4 TA.%c, TMP_OUT, state.clip[%u].plane;\n", 3229 component[cur_clip++], i); 3230 } 3231 } 3232 switch (cur_clip) 3233 { 3234 case 0: 3235 shader_addline(buffer, "MOV TA, %s;\n", zero); 3236 break; 3237 case 1: 3238 shader_addline(buffer, "MOV TA.yzw, %s;\n", zero); 3239 break; 3240 case 2: 3241 shader_addline(buffer, "MOV TA.zw, %s;\n", zero); 3242 break; 3243 case 3: 3244 shader_addline(buffer, "MOV TA.w, %s;\n", zero); 3245 break; 3246 } 3247 shader_addline(buffer, "MOV result.texcoord[%u], TA;\n", 3248 args->clip.boolclip.clip_texcoord - 1); 3249 } 3250 3251 /* Write the final position. 3252 * 3253 * OpenGL coordinates specify the center of the pixel while d3d coords specify 3254 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains 3255 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x 3256 * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that. 3257 */ 3258 if (!gl_info->supported[ARB_CLIP_CONTROL]) 3259 { 3260 shader_addline(buffer, "MUL TA, posFixup, TMP_OUT.w;\n"); 3261 shader_addline(buffer, "ADD TMP_OUT.x, TMP_OUT.x, TA.z;\n"); 3262 shader_addline(buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TA.w;\n"); 3263 3264 /* Z coord [0;1]->[-1;1] mapping, see comment in 3265 * get_projection_matrix() in utils.c. */ 3266 if (need_helper_const(shader_data, reg_maps, gl_info)) 3267 { 3268 const char *two = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_TWO); 3269 shader_addline(buffer, "MAD TMP_OUT.z, TMP_OUT.z, %s, -TMP_OUT.w;\n", two); 3270 } 3271 else 3272 { 3273 shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, TMP_OUT.z;\n"); 3274 shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, -TMP_OUT.w;\n"); 3275 } 3276 } 3277 3278 shader_addline(buffer, "MOV result.position, TMP_OUT;\n"); 3279 3280 priv_ctx->footer_written = TRUE; 3281 } 3282 3283 static void shader_hw_ret(const struct wined3d_shader_instruction *ins) 3284 { 3285 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3286 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3287 const struct wined3d_shader *shader = ins->ctx->shader; 3288 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3289 3290 if(priv->target_version == ARB) return; 3291 3292 if(vshader) 3293 { 3294 if (priv->in_main_func) vshader_add_footer(priv, shader->backend_data, 3295 priv->cur_vs_args, ins->ctx->reg_maps, ins->ctx->gl_info, buffer); 3296 } 3297 3298 shader_addline(buffer, "RET;\n"); 3299 } 3300 3301 static void shader_hw_call(const struct wined3d_shader_instruction *ins) 3302 { 3303 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3304 shader_addline(buffer, "CAL l%u;\n", ins->src[0].reg.idx[0].offset); 3305 } 3306 3307 static BOOL shader_arb_compile(const struct wined3d_gl_info *gl_info, GLenum target, const char *src) 3308 { 3309 const char *ptr, *line; 3310 GLint native, pos; 3311 3312 if (TRACE_ON(d3d_shader)) 3313 { 3314 ptr = src; 3315 while ((line = get_line(&ptr))) TRACE_(d3d_shader)(" %.*s", (int)(ptr - line), line); 3316 } 3317 3318 GL_EXTCALL(glProgramStringARB(target, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(src), src)); 3319 checkGLcall("glProgramStringARB()"); 3320 3321 if (FIXME_ON(d3d_shader)) 3322 { 3323 gl_info->gl_ops.gl.p_glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 3324 if (pos != -1) 3325 { 3326 FIXME_(d3d_shader)("Program error at position %d: %s\n\n", pos, 3327 debugstr_a((const char *)gl_info->gl_ops.gl.p_glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 3328 ptr = src; 3329 while ((line = get_line(&ptr))) FIXME_(d3d_shader)(" %.*s", (int)(ptr - line), line); 3330 FIXME_(d3d_shader)("\n"); 3331 3332 return FALSE; 3333 } 3334 } 3335 3336 if (WARN_ON(d3d_perf)) 3337 { 3338 GL_EXTCALL(glGetProgramivARB(target, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 3339 checkGLcall("glGetProgramivARB()"); 3340 if (!native) 3341 WARN_(d3d_perf)("Program exceeds native resource limits.\n"); 3342 } 3343 3344 return TRUE; 3345 } 3346 3347 static void arbfp_add_sRGB_correction(struct wined3d_string_buffer *buffer, const char *fragcolor, 3348 const char *tmp1, const char *tmp2, const char *tmp3, const char *tmp4, BOOL condcode) 3349 { 3350 /* Perform sRGB write correction. See GLX_EXT_framebuffer_sRGB */ 3351 3352 if(condcode) 3353 { 3354 /* Sigh. MOVC CC doesn't work, so use one of the temps as dummy dest */ 3355 shader_addline(buffer, "SUBC %s, %s.x, srgb_consts1.x;\n", tmp1, fragcolor); 3356 /* Calculate the > 0.0031308 case */ 3357 shader_addline(buffer, "POW %s.x (GE), %s.x, srgb_consts0.x;\n", fragcolor, fragcolor); 3358 shader_addline(buffer, "POW %s.y (GE), %s.y, srgb_consts0.x;\n", fragcolor, fragcolor); 3359 shader_addline(buffer, "POW %s.z (GE), %s.z, srgb_consts0.x;\n", fragcolor, fragcolor); 3360 shader_addline(buffer, "MUL %s.xyz (GE), %s, srgb_consts0.y;\n", fragcolor, fragcolor); 3361 shader_addline(buffer, "SUB %s.xyz (GE), %s, srgb_consts0.z;\n", fragcolor, fragcolor); 3362 /* Calculate the < case */ 3363 shader_addline(buffer, "MUL %s.xyz (LT), srgb_consts0.w, %s;\n", fragcolor, fragcolor); 3364 } 3365 else 3366 { 3367 /* Calculate the > 0.0031308 case */ 3368 shader_addline(buffer, "POW %s.x, %s.x, srgb_consts0.x;\n", tmp1, fragcolor); 3369 shader_addline(buffer, "POW %s.y, %s.y, srgb_consts0.x;\n", tmp1, fragcolor); 3370 shader_addline(buffer, "POW %s.z, %s.z, srgb_consts0.x;\n", tmp1, fragcolor); 3371 shader_addline(buffer, "MUL %s, %s, srgb_consts0.y;\n", tmp1, tmp1); 3372 shader_addline(buffer, "SUB %s, %s, srgb_consts0.z;\n", tmp1, tmp1); 3373 /* Calculate the < case */ 3374 shader_addline(buffer, "MUL %s, srgb_consts0.w, %s;\n", tmp2, fragcolor); 3375 /* Get 1.0 / 0.0 masks for > 0.0031308 and < 0.0031308 */ 3376 shader_addline(buffer, "SLT %s, srgb_consts1.x, %s;\n", tmp3, fragcolor); 3377 shader_addline(buffer, "SGE %s, srgb_consts1.x, %s;\n", tmp4, fragcolor); 3378 /* Store the components > 0.0031308 in the destination */ 3379 shader_addline(buffer, "MUL %s.xyz, %s, %s;\n", fragcolor, tmp1, tmp3); 3380 /* Add the components that are < 0.0031308 */ 3381 shader_addline(buffer, "MAD %s.xyz, %s, %s, %s;\n", fragcolor, tmp2, tmp4, fragcolor); 3382 /* Move everything into result.color at once. Nvidia hardware cannot handle partial 3383 * result.color writes(.rgb first, then .a), or handle overwriting already written 3384 * components. The assembler uses a temporary register in this case, which is usually 3385 * not allocated from one of our registers that were used earlier. 3386 */ 3387 } 3388 /* [0.0;1.0] clamping. Not needed, this is done implicitly */ 3389 } 3390 3391 static const DWORD *find_loop_control_values(const struct wined3d_shader *shader, DWORD idx) 3392 { 3393 const struct wined3d_shader_lconst *constant; 3394 3395 LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry) 3396 { 3397 if (constant->idx == idx) 3398 { 3399 return constant->value; 3400 } 3401 } 3402 return NULL; 3403 } 3404 3405 static void init_ps_input(const struct wined3d_shader *shader, 3406 const struct arb_ps_compile_args *args, struct shader_arb_ctx_priv *priv) 3407 { 3408 static const char * const texcoords[8] = 3409 { 3410 "fragment.texcoord[0]", "fragment.texcoord[1]", "fragment.texcoord[2]", "fragment.texcoord[3]", 3411 "fragment.texcoord[4]", "fragment.texcoord[5]", "fragment.texcoord[6]", "fragment.texcoord[7]" 3412 }; 3413 unsigned int i; 3414 const struct wined3d_shader_signature_element *input; 3415 const char *semantic_name; 3416 DWORD semantic_idx; 3417 3418 if (args->super.vp_mode == WINED3D_VP_MODE_SHADER) 3419 { 3420 /* That one is easy. The vertex shaders provide v0-v7 in 3421 * fragment.texcoord and v8 and v9 in fragment.color. */ 3422 for (i = 0; i < 8; ++i) 3423 { 3424 priv->ps_input[i] = texcoords[i]; 3425 } 3426 priv->ps_input[8] = "fragment.color.primary"; 3427 priv->ps_input[9] = "fragment.color.secondary"; 3428 return; 3429 } 3430 3431 /* The fragment shader has to collect the varyings on its own. In any case 3432 * properly load color0 and color1. In the case of pre-transformed 3433 * vertices also load texture coordinates. Set other attributes to 0.0. 3434 * 3435 * For fixed-function this behavior is correct, according to the tests. 3436 * For pre-transformed we'd either need a replacement shader that can load 3437 * other attributes like BINORMAL, or load the texture coordinate 3438 * attribute pointers to match the fragment shader signature. */ 3439 for (i = 0; i < shader->input_signature.element_count; ++i) 3440 { 3441 input = &shader->input_signature.elements[i]; 3442 if (!(semantic_name = input->semantic_name)) 3443 continue; 3444 semantic_idx = input->semantic_idx; 3445 3446 if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_COLOR)) 3447 { 3448 if (!semantic_idx) 3449 priv->ps_input[input->register_idx] = "fragment.color.primary"; 3450 else if (semantic_idx == 1) 3451 priv->ps_input[input->register_idx] = "fragment.color.secondary"; 3452 else 3453 priv->ps_input[input->register_idx] = "0.0"; 3454 } 3455 else if (args->super.vp_mode == WINED3D_VP_MODE_FF) 3456 { 3457 priv->ps_input[input->register_idx] = "0.0"; 3458 } 3459 else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_TEXCOORD)) 3460 { 3461 if (semantic_idx < 8) 3462 priv->ps_input[input->register_idx] = texcoords[semantic_idx]; 3463 else 3464 priv->ps_input[input->register_idx] = "0.0"; 3465 } 3466 else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_FOG)) 3467 { 3468 if (!semantic_idx) 3469 priv->ps_input[input->register_idx] = "fragment.fogcoord"; 3470 else 3471 priv->ps_input[input->register_idx] = "0.0"; 3472 } 3473 else 3474 { 3475 priv->ps_input[input->register_idx] = "0.0"; 3476 } 3477 3478 TRACE("v%u, semantic %s%u is %s\n", input->register_idx, 3479 semantic_name, semantic_idx, priv->ps_input[input->register_idx]); 3480 } 3481 } 3482 3483 static void arbfp_add_linear_fog(struct wined3d_string_buffer *buffer, 3484 const char *fragcolor, const char *tmp) 3485 { 3486 shader_addline(buffer, "SUB %s.x, state.fog.params.z, fragment.fogcoord.x;\n", tmp); 3487 shader_addline(buffer, "MUL_SAT %s.x, %s.x, state.fog.params.w;\n", tmp, tmp); 3488 shader_addline(buffer, "LRP %s.rgb, %s.x, %s, state.fog.color;\n", fragcolor, tmp, fragcolor); 3489 } 3490 3491 /* Context activation is done by the caller. */ 3492 static GLuint shader_arb_generate_pshader(const struct wined3d_shader *shader, 3493 const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer, 3494 const struct arb_ps_compile_args *args, struct arb_ps_compiled_shader *compiled) 3495 { 3496 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 3497 GLuint retval; 3498 char fragcolor[16]; 3499 DWORD next_local = 0; 3500 struct shader_arb_ctx_priv priv_ctx; 3501 BOOL dcl_td = FALSE; 3502 BOOL want_nv_prog = FALSE; 3503 struct arb_pshader_private *shader_priv = shader->backend_data; 3504 DWORD map; 3505 BOOL custom_linear_fog = FALSE; 3506 3507 char srgbtmp[4][4]; 3508 char ftoa_tmp[17]; 3509 unsigned int i, found = 0; 3510 3511 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) 3512 { 3513 if (!(map & 1) 3514 || (shader->u.ps.color0_mov && i == shader->u.ps.color0_reg) 3515 || (reg_maps->shader_version.major < 2 && !i)) 3516 continue; 3517 3518 sprintf(srgbtmp[found], "R%u", i); 3519 ++found; 3520 if (found == 4) break; 3521 } 3522 3523 switch(found) { 3524 case 0: 3525 sprintf(srgbtmp[0], "TA"); 3526 sprintf(srgbtmp[1], "TB"); 3527 sprintf(srgbtmp[2], "TC"); 3528 sprintf(srgbtmp[3], "TD"); 3529 dcl_td = TRUE; 3530 break; 3531 case 1: 3532 sprintf(srgbtmp[1], "TA"); 3533 sprintf(srgbtmp[2], "TB"); 3534 sprintf(srgbtmp[3], "TC"); 3535 break; 3536 case 2: 3537 sprintf(srgbtmp[2], "TA"); 3538 sprintf(srgbtmp[3], "TB"); 3539 break; 3540 case 3: 3541 sprintf(srgbtmp[3], "TA"); 3542 break; 3543 case 4: 3544 break; 3545 } 3546 3547 /* Create the hw ARB shader */ 3548 memset(&priv_ctx, 0, sizeof(priv_ctx)); 3549 priv_ctx.cur_ps_args = args; 3550 priv_ctx.compiled_fprog = compiled; 3551 priv_ctx.cur_np2fixup_info = &compiled->np2fixup_info; 3552 init_ps_input(shader, args, &priv_ctx); 3553 list_init(&priv_ctx.control_frames); 3554 priv_ctx.ps_post_process = args->super.srgb_correction; 3555 3556 /* Avoid enabling NV_fragment_program* if we do not need it. 3557 * 3558 * Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register, 3559 * and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation 3560 * is faster than what we gain from using higher native instructions. There are some things though 3561 * that cannot be emulated. In that case enable the extensions. 3562 * If the extension is enabled, instruction handlers that support both ways will use it. 3563 * 3564 * Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program. 3565 * So enable the best we can get. 3566 */ 3567 if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0 || reg_maps->usestexldd || 3568 reg_maps->usestexldl || reg_maps->usesfacing || reg_maps->usesifc || reg_maps->usescall) 3569 { 3570 want_nv_prog = TRUE; 3571 } 3572 3573 shader_addline(buffer, "!!ARBfp1.0\n"); 3574 if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM2]) 3575 { 3576 shader_addline(buffer, "OPTION NV_fragment_program2;\n"); 3577 priv_ctx.target_version = NV3; 3578 } 3579 else if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]) 3580 { 3581 shader_addline(buffer, "OPTION NV_fragment_program;\n"); 3582 priv_ctx.target_version = NV2; 3583 } else { 3584 if(want_nv_prog) 3585 { 3586 /* This is an error - either we're advertising the wrong shader version, or aren't enforcing some 3587 * limits properly 3588 */ 3589 ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n"); 3590 ERR("Try GLSL\n"); 3591 } 3592 priv_ctx.target_version = ARB; 3593 } 3594 3595 if (reg_maps->rt_mask > 1) 3596 { 3597 shader_addline(buffer, "OPTION ARB_draw_buffers;\n"); 3598 } 3599 3600 if (reg_maps->shader_version.major < 3) 3601 { 3602 switch (args->super.fog) 3603 { 3604 case WINED3D_FFP_PS_FOG_OFF: 3605 break; 3606 case WINED3D_FFP_PS_FOG_LINEAR: 3607 if (gl_info->quirks & WINED3D_QUIRK_BROKEN_ARB_FOG) 3608 { 3609 custom_linear_fog = TRUE; 3610 priv_ctx.ps_post_process = TRUE; 3611 break; 3612 } 3613 shader_addline(buffer, "OPTION ARB_fog_linear;\n"); 3614 break; 3615 case WINED3D_FFP_PS_FOG_EXP: 3616 shader_addline(buffer, "OPTION ARB_fog_exp;\n"); 3617 break; 3618 case WINED3D_FFP_PS_FOG_EXP2: 3619 shader_addline(buffer, "OPTION ARB_fog_exp2;\n"); 3620 break; 3621 } 3622 } 3623 3624 /* For now always declare the temps. At least the Nvidia assembler optimizes completely 3625 * unused temps away(but occupies them for the whole shader if they're used once). Always 3626 * declaring them avoids tricky bookkeeping work 3627 */ 3628 shader_addline(buffer, "TEMP TA;\n"); /* Used for modifiers */ 3629 shader_addline(buffer, "TEMP TB;\n"); /* Used for modifiers */ 3630 shader_addline(buffer, "TEMP TC;\n"); /* Used for modifiers */ 3631 if(dcl_td) shader_addline(buffer, "TEMP TD;\n"); /* Used for sRGB writing */ 3632 shader_addline(buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n"); 3633 shader_addline(buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n"); 3634 wined3d_ftoa(eps, ftoa_tmp); 3635 shader_addline(buffer, "PARAM ps_helper_const = { 0.0, 1.0, %s, 0.0 };\n", ftoa_tmp); 3636 3637 if (reg_maps->shader_version.major < 2) 3638 { 3639 strcpy(fragcolor, "R0"); 3640 } 3641 else 3642 { 3643 if (priv_ctx.ps_post_process) 3644 { 3645 if (shader->u.ps.color0_mov) 3646 { 3647 sprintf(fragcolor, "R%u", shader->u.ps.color0_reg); 3648 } 3649 else 3650 { 3651 shader_addline(buffer, "TEMP TMP_COLOR;\n"); 3652 strcpy(fragcolor, "TMP_COLOR"); 3653 } 3654 } else { 3655 strcpy(fragcolor, "result.color"); 3656 } 3657 } 3658 3659 if (args->super.srgb_correction) 3660 { 3661 shader_addline(buffer, "PARAM srgb_consts0 = "); 3662 shader_arb_append_imm_vec4(buffer, wined3d_srgb_const0); 3663 shader_addline(buffer, ";\n"); 3664 shader_addline(buffer, "PARAM srgb_consts1 = "); 3665 shader_arb_append_imm_vec4(buffer, wined3d_srgb_const1); 3666 shader_addline(buffer, ";\n"); 3667 } 3668 3669 /* Base Declarations */ 3670 shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info, NULL, &priv_ctx); 3671 3672 for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i) 3673 { 3674 unsigned char bump_const; 3675 3676 if (!(map & 1)) continue; 3677 3678 bump_const = compiled->numbumpenvmatconsts; 3679 compiled->bumpenvmatconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED; 3680 compiled->bumpenvmatconst[bump_const].texunit = i; 3681 compiled->luminanceconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED; 3682 compiled->luminanceconst[bump_const].texunit = i; 3683 3684 /* We can fit the constants into the constant limit for sure because texbem, texbeml, bem and beml are only supported 3685 * in 1.x shaders, and GL_ARB_fragment_program has a constant limit of 24 constants. So in the worst case we're loading 3686 * 8 shader constants, 8 bump matrices and 8 luminance parameters and are perfectly fine. (No NP2 fixup on bumpmapped 3687 * textures due to conditional NP2 restrictions) 3688 * 3689 * Use local constants to load the bump env parameters, not program.env. This avoids collisions with d3d constants of 3690 * shaders in newer shader models. Since the bump env parameters have to share their space with NP2 fixup constants, 3691 * their location is shader dependent anyway and they cannot be loaded globally. 3692 */ 3693 compiled->bumpenvmatconst[bump_const].const_num = next_local++; 3694 shader_addline(buffer, "PARAM bumpenvmat%d = program.local[%d];\n", 3695 i, compiled->bumpenvmatconst[bump_const].const_num); 3696 compiled->numbumpenvmatconsts = bump_const + 1; 3697 3698 if (!(reg_maps->luminanceparams & (1u << i))) 3699 continue; 3700 3701 compiled->luminanceconst[bump_const].const_num = next_local++; 3702 shader_addline(buffer, "PARAM luminance%d = program.local[%d];\n", 3703 i, compiled->luminanceconst[bump_const].const_num); 3704 } 3705 3706 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 3707 { 3708 compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED; 3709 if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2) 3710 { 3711 const DWORD *control_values = find_loop_control_values(shader, i); 3712 3713 if(control_values) 3714 { 3715 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i, 3716 control_values[0], control_values[1], control_values[2]); 3717 } 3718 else 3719 { 3720 compiled->int_consts[i] = next_local; 3721 compiled->num_int_consts++; 3722 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++); 3723 } 3724 } 3725 } 3726 3727 if(reg_maps->vpos || reg_maps->usesdsy) 3728 { 3729 compiled->ycorrection = next_local; 3730 shader_addline(buffer, "PARAM ycorrection = program.local[%u];\n", next_local++); 3731 3732 if(reg_maps->vpos) 3733 { 3734 shader_addline(buffer, "TEMP vpos;\n"); 3735 /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen). 3736 * ycorrection.y: -1.0(onscreen), 1.0(offscreen) 3737 * ycorrection.z: 1.0 3738 * ycorrection.w: 0.0 3739 */ 3740 shader_addline(buffer, "MAD vpos, fragment.position, ycorrection.zyww, ycorrection.wxww;\n"); 3741 shader_addline(buffer, "FLR vpos.xy, vpos;\n"); 3742 } 3743 } 3744 else 3745 { 3746 compiled->ycorrection = WINED3D_CONST_NUM_UNUSED; 3747 } 3748 3749 /* Load constants to fixup NP2 texcoords if there are still free constants left: 3750 * Constants (texture dimensions) for the NP2 fixup are loaded as local program parameters. This will consume 3751 * at most 8 (MAX_FRAGMENT_SAMPLERS / 2) parameters, which is highly unlikely, since the application had to 3752 * use 16 NP2 textures at the same time. In case that we run out of constants the fixup is simply not 3753 * applied / activated. This will probably result in wrong rendering of the texture, but will save us from 3754 * shader compilation errors and the subsequent errors when drawing with this shader. */ 3755 if (priv_ctx.cur_ps_args->super.np2_fixup) { 3756 unsigned char cur_fixup_sampler = 0; 3757 3758 struct arb_ps_np2fixup_info* const fixup = priv_ctx.cur_np2fixup_info; 3759 const WORD map = priv_ctx.cur_ps_args->super.np2_fixup; 3760 const UINT max_lconsts = gl_info->limits.arb_ps_local_constants; 3761 3762 fixup->offset = next_local; 3763 fixup->super.active = 0; 3764 3765 for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) 3766 { 3767 if (!(map & (1u << i))) 3768 continue; 3769 3770 if (fixup->offset + (cur_fixup_sampler >> 1) < max_lconsts) 3771 { 3772 fixup->super.active |= (1u << i); 3773 fixup->super.idx[i] = cur_fixup_sampler++; 3774 } 3775 else 3776 { 3777 FIXME("No free constant found to load NP2 fixup data into shader. " 3778 "Sampling from this texture will probably look wrong.\n"); 3779 break; 3780 } 3781 } 3782 3783 fixup->super.num_consts = (cur_fixup_sampler + 1) >> 1; 3784 if (fixup->super.num_consts) { 3785 shader_addline(buffer, "PARAM np2fixup[%u] = { program.env[%u..%u] };\n", 3786 fixup->super.num_consts, fixup->offset, fixup->super.num_consts + fixup->offset - 1); 3787 } 3788 } 3789 3790 if (shader_priv->clipplane_emulation != ~0U && args->clip) 3791 { 3792 shader_addline(buffer, "KIL fragment.texcoord[%u];\n", shader_priv->clipplane_emulation); 3793 } 3794 3795 /* Base Shader Body */ 3796 if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL))) 3797 return 0; 3798 3799 if(args->super.srgb_correction) { 3800 arbfp_add_sRGB_correction(buffer, fragcolor, srgbtmp[0], srgbtmp[1], srgbtmp[2], srgbtmp[3], 3801 priv_ctx.target_version >= NV2); 3802 } 3803 3804 if (custom_linear_fog) 3805 arbfp_add_linear_fog(buffer, fragcolor, "TA"); 3806 3807 if(strcmp(fragcolor, "result.color")) { 3808 shader_addline(buffer, "MOV result.color, %s;\n", fragcolor); 3809 } 3810 shader_addline(buffer, "END\n"); 3811 3812 /* TODO: change to resource.glObjectHandle or something like that */ 3813 GL_EXTCALL(glGenProgramsARB(1, &retval)); 3814 3815 TRACE("Creating a hw pixel shader, prg=%d\n", retval); 3816 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval)); 3817 3818 TRACE("Created hw pixel shader, prg=%d\n", retval); 3819 if (!shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer->buffer)) 3820 return 0; 3821 3822 return retval; 3823 } 3824 3825 static int compare_sig(const struct wined3d_shader_signature *sig1, const struct wined3d_shader_signature *sig2) 3826 { 3827 unsigned int i; 3828 int ret; 3829 3830 if (sig1->element_count != sig2->element_count) 3831 return sig1->element_count < sig2->element_count ? -1 : 1; 3832 3833 for (i = 0; i < sig1->element_count; ++i) 3834 { 3835 const struct wined3d_shader_signature_element *e1, *e2; 3836 3837 e1 = &sig1->elements[i]; 3838 e2 = &sig2->elements[i]; 3839 3840 if (!e1->semantic_name || !e2->semantic_name) 3841 { 3842 /* Compare pointers, not contents. One string is NULL (element 3843 * does not exist), the other one is not NULL. */ 3844 if (e1->semantic_name != e2->semantic_name) 3845 return e1->semantic_name < e2->semantic_name ? -1 : 1; 3846 continue; 3847 } 3848 3849 if ((ret = strcmp(e1->semantic_name, e2->semantic_name))) 3850 return ret; 3851 if (e1->semantic_idx != e2->semantic_idx) 3852 return e1->semantic_idx < e2->semantic_idx ? -1 : 1; 3853 if (e1->sysval_semantic != e2->sysval_semantic) 3854 return e1->sysval_semantic < e2->sysval_semantic ? -1 : 1; 3855 if (e1->component_type != e2->component_type) 3856 return e1->component_type < e2->component_type ? -1 : 1; 3857 if (e1->register_idx != e2->register_idx) 3858 return e1->register_idx < e2->register_idx ? -1 : 1; 3859 if (e1->mask != e2->mask) 3860 return e1->mask < e2->mask ? -1 : 1; 3861 } 3862 return 0; 3863 } 3864 3865 static void clone_sig(struct wined3d_shader_signature *new, const struct wined3d_shader_signature *sig) 3866 { 3867 unsigned int i; 3868 char *name; 3869 3870 new->element_count = sig->element_count; 3871 new->elements = heap_calloc(new->element_count, sizeof(*new->elements)); 3872 for (i = 0; i < sig->element_count; ++i) 3873 { 3874 new->elements[i] = sig->elements[i]; 3875 3876 if (!new->elements[i].semantic_name) 3877 continue; 3878 3879 /* Clone the semantic string */ 3880 name = heap_alloc(strlen(sig->elements[i].semantic_name) + 1); 3881 strcpy(name, sig->elements[i].semantic_name); 3882 new->elements[i].semantic_name = name; 3883 } 3884 } 3885 3886 static DWORD find_input_signature(struct shader_arb_priv *priv, const struct wined3d_shader_signature *sig) 3887 { 3888 struct wine_rb_entry *entry = wine_rb_get(&priv->signature_tree, sig); 3889 struct ps_signature *found_sig; 3890 3891 if (entry) 3892 { 3893 found_sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 3894 TRACE("Found existing signature %u\n", found_sig->idx); 3895 return found_sig->idx; 3896 } 3897 found_sig = heap_alloc_zero(sizeof(*found_sig)); 3898 clone_sig(&found_sig->sig, sig); 3899 found_sig->idx = priv->ps_sig_number++; 3900 TRACE("New signature stored and assigned number %u\n", found_sig->idx); 3901 if(wine_rb_put(&priv->signature_tree, sig, &found_sig->entry) == -1) 3902 { 3903 ERR("Failed to insert program entry.\n"); 3904 } 3905 return found_sig->idx; 3906 } 3907 3908 static void init_output_registers(const struct wined3d_shader *shader, 3909 const struct wined3d_shader_signature *ps_input_sig, 3910 struct shader_arb_ctx_priv *priv_ctx, struct arb_vs_compiled_shader *compiled) 3911 { 3912 unsigned int i, j; 3913 static const char * const texcoords[8] = 3914 { 3915 "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]", 3916 "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]" 3917 }; 3918 /* Write generic input varyings 0 to 7 to result.texcoord[], varying 8 to result.color.primary 3919 * and varying 9 to result.color.secondary 3920 */ 3921 static const char * const decl_idx_to_string[MAX_REG_INPUT] = 3922 { 3923 "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]", 3924 "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]", 3925 "result.color.primary", "result.color.secondary" 3926 }; 3927 3928 if (!ps_input_sig) 3929 { 3930 TRACE("Pixel shader uses builtin varyings\n"); 3931 /* Map builtins to builtins */ 3932 for(i = 0; i < 8; i++) 3933 { 3934 priv_ctx->texcrd_output[i] = texcoords[i]; 3935 } 3936 priv_ctx->color_output[0] = "result.color.primary"; 3937 priv_ctx->color_output[1] = "result.color.secondary"; 3938 priv_ctx->fog_output = "TMP_FOGCOORD"; 3939 3940 /* Map declared regs to builtins. Use "TA" to /dev/null unread output */ 3941 for (i = 0; i < shader->output_signature.element_count; ++i) 3942 { 3943 const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i]; 3944 3945 if (!output->semantic_name) 3946 continue; 3947 3948 if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_POSITION)) 3949 { 3950 TRACE("o%u is TMP_OUT\n", output->register_idx); 3951 if (!output->semantic_idx) 3952 priv_ctx->vs_output[output->register_idx] = "TMP_OUT"; 3953 else 3954 priv_ctx->vs_output[output->register_idx] = "TA"; 3955 } 3956 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_PSIZE)) 3957 { 3958 TRACE("o%u is result.pointsize\n", output->register_idx); 3959 if (!output->semantic_idx) 3960 priv_ctx->vs_output[output->register_idx] = "result.pointsize"; 3961 else 3962 priv_ctx->vs_output[output->register_idx] = "TA"; 3963 } 3964 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_COLOR)) 3965 { 3966 TRACE("o%u is result.color.?, idx %u\n", output->register_idx, output->semantic_idx); 3967 if (!output->semantic_idx) 3968 priv_ctx->vs_output[output->register_idx] = "result.color.primary"; 3969 else if (output->semantic_idx == 1) 3970 priv_ctx->vs_output[output->register_idx] = "result.color.secondary"; 3971 else priv_ctx->vs_output[output->register_idx] = "TA"; 3972 } 3973 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_TEXCOORD)) 3974 { 3975 TRACE("o%u is result.texcoord[%u]\n", output->register_idx, output->semantic_idx); 3976 if (output->semantic_idx >= 8) 3977 priv_ctx->vs_output[output->register_idx] = "TA"; 3978 else 3979 priv_ctx->vs_output[output->register_idx] = texcoords[output->semantic_idx]; 3980 } 3981 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_FOG)) 3982 { 3983 TRACE("o%u is result.fogcoord\n", output->register_idx); 3984 if (output->semantic_idx > 0) 3985 priv_ctx->vs_output[output->register_idx] = "TA"; 3986 else 3987 priv_ctx->vs_output[output->register_idx] = "result.fogcoord"; 3988 } 3989 else 3990 { 3991 priv_ctx->vs_output[output->register_idx] = "TA"; 3992 } 3993 } 3994 return; 3995 } 3996 3997 TRACE("Pixel shader uses declared varyings\n"); 3998 3999 /* Map builtin to declared. /dev/null the results by default to the TA temp reg */ 4000 for(i = 0; i < 8; i++) 4001 { 4002 priv_ctx->texcrd_output[i] = "TA"; 4003 } 4004 priv_ctx->color_output[0] = "TA"; 4005 priv_ctx->color_output[1] = "TA"; 4006 priv_ctx->fog_output = "TA"; 4007 4008 for (i = 0; i < ps_input_sig->element_count; ++i) 4009 { 4010 const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[i]; 4011 4012 if (!input->semantic_name) 4013 continue; 4014 4015 /* If a declared input register is not written by builtin arguments, don't write to it. 4016 * GL_NV_vertex_program makes sure the input defaults to 0.0, which is correct with D3D 4017 * 4018 * Don't care about POSITION and PSIZE here - this is a builtin vertex shader, position goes 4019 * to TMP_OUT in any case 4020 */ 4021 if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_TEXCOORD)) 4022 { 4023 if (input->semantic_idx < 8) 4024 priv_ctx->texcrd_output[input->semantic_idx] = decl_idx_to_string[input->register_idx]; 4025 } 4026 else if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_COLOR)) 4027 { 4028 if (input->semantic_idx < 2) 4029 priv_ctx->color_output[input->semantic_idx] = decl_idx_to_string[input->register_idx]; 4030 } 4031 else if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_FOG)) 4032 { 4033 if (!input->semantic_idx) 4034 priv_ctx->fog_output = decl_idx_to_string[input->register_idx]; 4035 } 4036 else 4037 { 4038 continue; 4039 } 4040 4041 if (!strcmp(decl_idx_to_string[input->register_idx], "result.color.primary") 4042 || !strcmp(decl_idx_to_string[input->register_idx], "result.color.secondary")) 4043 { 4044 compiled->need_color_unclamp = TRUE; 4045 } 4046 } 4047 4048 /* Map declared to declared */ 4049 for (i = 0; i < shader->output_signature.element_count; ++i) 4050 { 4051 const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i]; 4052 4053 /* Write unread output to TA to throw them away */ 4054 priv_ctx->vs_output[output->register_idx] = "TA"; 4055 4056 if (!output->semantic_name) 4057 continue; 4058 4059 if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_POSITION) && !output->semantic_idx) 4060 { 4061 priv_ctx->vs_output[output->register_idx] = "TMP_OUT"; 4062 continue; 4063 } 4064 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_PSIZE) && !output->semantic_idx) 4065 { 4066 priv_ctx->vs_output[output->register_idx] = "result.pointsize"; 4067 continue; 4068 } 4069 4070 for (j = 0; j < ps_input_sig->element_count; ++j) 4071 { 4072 const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[j]; 4073 4074 if (!input->semantic_name) 4075 continue; 4076 4077 if (!strcmp(input->semantic_name, output->semantic_name) 4078 && input->semantic_idx == output->semantic_idx) 4079 { 4080 priv_ctx->vs_output[output->register_idx] = decl_idx_to_string[input->register_idx]; 4081 4082 if (!strcmp(priv_ctx->vs_output[output->register_idx], "result.color.primary") 4083 || !strcmp(priv_ctx->vs_output[output->register_idx], "result.color.secondary")) 4084 { 4085 compiled->need_color_unclamp = TRUE; 4086 } 4087 } 4088 } 4089 } 4090 } 4091 4092 /* Context activation is done by the caller. */ 4093 static GLuint shader_arb_generate_vshader(const struct wined3d_shader *shader, 4094 const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer, 4095 const struct arb_vs_compile_args *args, struct arb_vs_compiled_shader *compiled, 4096 const struct wined3d_shader_signature *ps_input_sig) 4097 { 4098 const struct arb_vshader_private *shader_data = shader->backend_data; 4099 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 4100 struct shader_arb_priv *priv = shader->device->shader_priv; 4101 GLuint ret; 4102 DWORD next_local = 0; 4103 struct shader_arb_ctx_priv priv_ctx; 4104 unsigned int i; 4105 4106 memset(&priv_ctx, 0, sizeof(priv_ctx)); 4107 priv_ctx.cur_vs_args = args; 4108 list_init(&priv_ctx.control_frames); 4109 init_output_registers(shader, ps_input_sig, &priv_ctx, compiled); 4110 4111 /* Create the hw ARB shader */ 4112 shader_addline(buffer, "!!ARBvp1.0\n"); 4113 4114 /* Always enable the NV extension if available. Unlike fragment shaders, there is no 4115 * mesurable performance penalty, and we can always make use of it for clipplanes. 4116 */ 4117 if (gl_info->supported[NV_VERTEX_PROGRAM3]) 4118 { 4119 shader_addline(buffer, "OPTION NV_vertex_program3;\n"); 4120 priv_ctx.target_version = NV3; 4121 shader_addline(buffer, "ADDRESS aL;\n"); 4122 } 4123 else if (gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]) 4124 { 4125 shader_addline(buffer, "OPTION NV_vertex_program2;\n"); 4126 priv_ctx.target_version = NV2; 4127 shader_addline(buffer, "ADDRESS aL;\n"); 4128 } else { 4129 priv_ctx.target_version = ARB; 4130 } 4131 4132 shader_addline(buffer, "TEMP TMP_OUT;\n"); 4133 if (reg_maps->fog) 4134 shader_addline(buffer, "TEMP TMP_FOGCOORD;\n"); 4135 if (need_helper_const(shader_data, reg_maps, gl_info)) 4136 { 4137 char ftoa_tmp[17]; 4138 wined3d_ftoa(eps, ftoa_tmp); 4139 shader_addline(buffer, "PARAM helper_const = { 0.0, 1.0, 2.0, %s};\n", ftoa_tmp); 4140 } 4141 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) 4142 { 4143 shader_addline(buffer, "PARAM rel_addr_const = { 0.5, %d.0, 0.0, 0.0 };\n", shader_data->rel_offset); 4144 shader_addline(buffer, "TEMP A0_SHADOW;\n"); 4145 } 4146 4147 shader_addline(buffer, "TEMP TA;\n"); 4148 shader_addline(buffer, "TEMP TB;\n"); 4149 4150 /* Base Declarations */ 4151 shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info, 4152 &priv_ctx.vs_clipplanes, &priv_ctx); 4153 4154 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 4155 { 4156 compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED; 4157 if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2) 4158 { 4159 const DWORD *control_values = find_loop_control_values(shader, i); 4160 4161 if(control_values) 4162 { 4163 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i, 4164 control_values[0], control_values[1], control_values[2]); 4165 } 4166 else 4167 { 4168 compiled->int_consts[i] = next_local; 4169 compiled->num_int_consts++; 4170 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++); 4171 } 4172 } 4173 } 4174 4175 /* We need a constant to fixup the final position */ 4176 shader_addline(buffer, "PARAM posFixup = program.local[%u];\n", next_local); 4177 compiled->pos_fixup = next_local++; 4178 4179 /* Initialize output parameters. GL_ARB_vertex_program does not require special initialization values 4180 * for output parameters. D3D in theory does not do that either, but some applications depend on a 4181 * proper initialization of the secondary color, and programs using the fixed function pipeline without 4182 * a replacement shader depend on the texcoord.w being set properly. 4183 * 4184 * GL_NV_vertex_program defines that all output values are initialized to {0.0, 0.0, 0.0, 1.0}. This 4185 * assertion is in effect even when using GL_ARB_vertex_program without any NV specific additions. So 4186 * skip this if NV_vertex_program is supported. Otherwise, initialize the secondary color. For the tex- 4187 * coords, we have a flag in the opengl caps. Many cards do not require the texcoord being set, and 4188 * this can eat a number of instructions, so skip it unless this cap is set as well 4189 */ 4190 if (!gl_info->supported[NV_VERTEX_PROGRAM]) 4191 { 4192 const char *color_init = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_0001); 4193 shader_addline(buffer, "MOV result.color.secondary, %s;\n", color_init); 4194 4195 if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W && !priv->ffp_proj_control) 4196 { 4197 int i; 4198 const char *one = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ONE); 4199 for(i = 0; i < MAX_REG_TEXCRD; i++) 4200 { 4201 if (reg_maps->u.texcoord_mask[i] && reg_maps->u.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) 4202 shader_addline(buffer, "MOV result.texcoord[%u].w, %s\n", i, one); 4203 } 4204 } 4205 } 4206 4207 /* The shader starts with the main function */ 4208 priv_ctx.in_main_func = TRUE; 4209 /* Base Shader Body */ 4210 if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL))) 4211 return -1; 4212 4213 if (!priv_ctx.footer_written) vshader_add_footer(&priv_ctx, 4214 shader_data, args, reg_maps, gl_info, buffer); 4215 4216 shader_addline(buffer, "END\n"); 4217 4218 /* TODO: change to resource.glObjectHandle or something like that */ 4219 GL_EXTCALL(glGenProgramsARB(1, &ret)); 4220 4221 TRACE("Creating a hw vertex shader, prg=%d\n", ret); 4222 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, ret)); 4223 4224 TRACE("Created hw vertex shader, prg=%d\n", ret); 4225 if (!shader_arb_compile(gl_info, GL_VERTEX_PROGRAM_ARB, buffer->buffer)) 4226 return -1; 4227 4228 return ret; 4229 } 4230 4231 /* Context activation is done by the caller. */ 4232 static struct arb_ps_compiled_shader *find_arb_pshader(struct wined3d_shader *shader, 4233 const struct arb_ps_compile_args *args) 4234 { 4235 struct wined3d_device *device = shader->device; 4236 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 4237 const struct wined3d_d3d_info *d3d_info = &device->adapter->d3d_info; 4238 UINT i; 4239 DWORD new_size; 4240 struct arb_ps_compiled_shader *new_array; 4241 struct wined3d_string_buffer buffer; 4242 struct arb_pshader_private *shader_data; 4243 GLuint ret; 4244 4245 if (!shader->backend_data) 4246 { 4247 struct shader_arb_priv *priv = device->shader_priv; 4248 4249 shader->backend_data = heap_alloc_zero(sizeof(*shader_data)); 4250 shader_data = shader->backend_data; 4251 shader_data->clamp_consts = shader->reg_maps.shader_version.major == 1; 4252 4253 if (shader->reg_maps.shader_version.major < 3) 4254 shader_data->input_signature_idx = ~0U; 4255 else 4256 shader_data->input_signature_idx = find_input_signature(priv, &shader->input_signature); 4257 4258 TRACE("Shader got assigned input signature index %u\n", shader_data->input_signature_idx); 4259 4260 if (!d3d_info->vs_clipping) 4261 shader_data->clipplane_emulation = shader_find_free_input_register(&shader->reg_maps, 4262 d3d_info->limits.ffp_blend_stages - 1); 4263 else 4264 shader_data->clipplane_emulation = ~0U; 4265 } 4266 shader_data = shader->backend_data; 4267 4268 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 4269 * so a linear search is more performant than a hashmap or a binary search 4270 * (cache coherency etc) 4271 */ 4272 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4273 { 4274 if (!memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args))) 4275 return &shader_data->gl_shaders[i]; 4276 } 4277 4278 TRACE("No matching GL shader found, compiling a new shader\n"); 4279 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 4280 if (shader_data->num_gl_shaders) 4281 { 4282 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 4283 new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders, 4284 new_size * sizeof(*shader_data->gl_shaders)); 4285 } 4286 else 4287 { 4288 new_array = heap_alloc_zero(sizeof(*shader_data->gl_shaders)); 4289 new_size = 1; 4290 } 4291 4292 if(!new_array) { 4293 ERR("Out of memory\n"); 4294 return 0; 4295 } 4296 shader_data->gl_shaders = new_array; 4297 shader_data->shader_array_size = new_size; 4298 } 4299 4300 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 4301 4302 pixelshader_update_resource_types(shader, args->super.tex_types); 4303 4304 if (!string_buffer_init(&buffer)) 4305 { 4306 ERR("Failed to initialize shader buffer.\n"); 4307 return 0; 4308 } 4309 4310 ret = shader_arb_generate_pshader(shader, gl_info, &buffer, args, 4311 &shader_data->gl_shaders[shader_data->num_gl_shaders]); 4312 string_buffer_free(&buffer); 4313 shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret; 4314 4315 return &shader_data->gl_shaders[shader_data->num_gl_shaders++]; 4316 } 4317 4318 static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new, 4319 const DWORD use_map, BOOL skip_int) { 4320 if((stored->super.swizzle_map & use_map) != new->super.swizzle_map) return FALSE; 4321 if(stored->super.clip_enabled != new->super.clip_enabled) return FALSE; 4322 if(stored->super.fog_src != new->super.fog_src) return FALSE; 4323 if(stored->clip.boolclip_compare != new->clip.boolclip_compare) return FALSE; 4324 if(stored->ps_signature != new->ps_signature) return FALSE; 4325 if(stored->vertex.samplers_compare != new->vertex.samplers_compare) return FALSE; 4326 if(skip_int) return TRUE; 4327 4328 return !memcmp(stored->loop_ctrl, new->loop_ctrl, sizeof(stored->loop_ctrl)); 4329 } 4330 4331 static struct arb_vs_compiled_shader *find_arb_vshader(struct wined3d_shader *shader, 4332 const struct wined3d_gl_info *gl_info, DWORD use_map, const struct arb_vs_compile_args *args, 4333 const struct wined3d_shader_signature *ps_input_sig) 4334 { 4335 UINT i; 4336 DWORD new_size; 4337 struct arb_vs_compiled_shader *new_array; 4338 struct wined3d_string_buffer buffer; 4339 struct arb_vshader_private *shader_data; 4340 GLuint ret; 4341 4342 if (!shader->backend_data) 4343 { 4344 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 4345 4346 shader->backend_data = heap_alloc_zero(sizeof(*shader_data)); 4347 shader_data = shader->backend_data; 4348 4349 if ((gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) 4350 && reg_maps->min_rel_offset <= reg_maps->max_rel_offset) 4351 { 4352 if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 127) 4353 { 4354 FIXME("The difference between the minimum and maximum relative offset is > 127.\n"); 4355 FIXME("Which this OpenGL implementation does not support. Try using GLSL.\n"); 4356 FIXME("Min: %u, Max: %u.\n", reg_maps->min_rel_offset, reg_maps->max_rel_offset); 4357 } 4358 else if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 63) 4359 shader_data->rel_offset = reg_maps->min_rel_offset + 63; 4360 else if (reg_maps->max_rel_offset > 63) 4361 shader_data->rel_offset = reg_maps->min_rel_offset; 4362 } 4363 } 4364 shader_data = shader->backend_data; 4365 4366 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 4367 * so a linear search is more performant than a hashmap or a binary search 4368 * (cache coherency etc) 4369 */ 4370 for(i = 0; i < shader_data->num_gl_shaders; i++) { 4371 if (vs_args_equal(&shader_data->gl_shaders[i].args, args, 4372 use_map, gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])) 4373 { 4374 return &shader_data->gl_shaders[i]; 4375 } 4376 } 4377 4378 TRACE("No matching GL shader found, compiling a new shader\n"); 4379 4380 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 4381 if (shader_data->num_gl_shaders) 4382 { 4383 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 4384 new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders, 4385 new_size * sizeof(*shader_data->gl_shaders)); 4386 } 4387 else 4388 { 4389 new_array = heap_alloc_zero(sizeof(*shader_data->gl_shaders)); 4390 new_size = 1; 4391 } 4392 4393 if(!new_array) { 4394 ERR("Out of memory\n"); 4395 return 0; 4396 } 4397 shader_data->gl_shaders = new_array; 4398 shader_data->shader_array_size = new_size; 4399 } 4400 4401 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 4402 4403 if (!string_buffer_init(&buffer)) 4404 { 4405 ERR("Failed to initialize shader buffer.\n"); 4406 return 0; 4407 } 4408 4409 ret = shader_arb_generate_vshader(shader, gl_info, &buffer, args, 4410 &shader_data->gl_shaders[shader_data->num_gl_shaders], 4411 ps_input_sig); 4412 string_buffer_free(&buffer); 4413 shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret; 4414 4415 return &shader_data->gl_shaders[shader_data->num_gl_shaders++]; 4416 } 4417 4418 static void find_arb_ps_compile_args(const struct wined3d_state *state, 4419 const struct wined3d_context *context, const struct wined3d_shader *shader, 4420 struct arb_ps_compile_args *args) 4421 { 4422 const struct wined3d_gl_info *gl_info = context->gl_info; 4423 const struct wined3d_d3d_info *d3d_info = context->d3d_info; 4424 int i; 4425 WORD int_skip; 4426 4427 find_ps_compile_args(state, shader, context->stream_info.position_transformed, &args->super, context); 4428 4429 /* This forces all local boolean constants to 1 to make them stateblock independent */ 4430 args->bools = shader->reg_maps.local_bool_consts; 4431 4432 for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) 4433 { 4434 if (state->ps_consts_b[i]) 4435 args->bools |= ( 1u << i); 4436 } 4437 4438 /* Only enable the clip plane emulation KIL if at least one clipplane is enabled. The KIL instruction 4439 * is quite expensive because it forces the driver to disable early Z discards. It is cheaper to 4440 * duplicate the shader than have a no-op KIL instruction in every shader 4441 */ 4442 if (!d3d_info->vs_clipping && use_vs(state) 4443 && state->render_states[WINED3D_RS_CLIPPING] 4444 && state->render_states[WINED3D_RS_CLIPPLANEENABLE]) 4445 args->clip = 1; 4446 else 4447 args->clip = 0; 4448 4449 /* Skip if unused or local, or supported natively */ 4450 int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts; 4451 if (int_skip == 0xffff || gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]) 4452 { 4453 memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl)); 4454 return; 4455 } 4456 4457 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 4458 { 4459 if (int_skip & (1u << i)) 4460 { 4461 args->loop_ctrl[i][0] = 0; 4462 args->loop_ctrl[i][1] = 0; 4463 args->loop_ctrl[i][2] = 0; 4464 } 4465 else 4466 { 4467 args->loop_ctrl[i][0] = state->ps_consts_i[i].x; 4468 args->loop_ctrl[i][1] = state->ps_consts_i[i].y; 4469 args->loop_ctrl[i][2] = state->ps_consts_i[i].z; 4470 } 4471 } 4472 } 4473 4474 static void find_arb_vs_compile_args(const struct wined3d_state *state, 4475 const struct wined3d_context *context, const struct wined3d_shader *shader, 4476 struct arb_vs_compile_args *args) 4477 { 4478 const struct wined3d_device *device = shader->device; 4479 const struct wined3d_adapter *adapter = device->adapter; 4480 const struct wined3d_gl_info *gl_info = context->gl_info; 4481 const struct wined3d_d3d_info *d3d_info = context->d3d_info; 4482 int i; 4483 WORD int_skip; 4484 4485 find_vs_compile_args(state, shader, context->stream_info.swizzle_map, &args->super, context); 4486 4487 args->clip.boolclip_compare = 0; 4488 if (use_ps(state)) 4489 { 4490 const struct wined3d_shader *ps = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 4491 const struct arb_pshader_private *shader_priv = ps->backend_data; 4492 args->ps_signature = shader_priv->input_signature_idx; 4493 4494 args->clip.boolclip.clip_texcoord = shader_priv->clipplane_emulation + 1; 4495 } 4496 else 4497 { 4498 args->ps_signature = ~0; 4499 if (!d3d_info->vs_clipping && adapter->fragment_pipe == &arbfp_fragment_pipeline) 4500 args->clip.boolclip.clip_texcoord = ffp_clip_emul(context) ? d3d_info->limits.ffp_blend_stages : 0; 4501 /* Otherwise: Setting boolclip_compare set clip_texcoord to 0 */ 4502 } 4503 4504 if (args->clip.boolclip.clip_texcoord) 4505 { 4506 if (state->render_states[WINED3D_RS_CLIPPING]) 4507 args->clip.boolclip.clipplane_mask = (unsigned char)state->render_states[WINED3D_RS_CLIPPLANEENABLE]; 4508 /* clipplane_mask was set to 0 by setting boolclip_compare to 0 */ 4509 } 4510 4511 /* This forces all local boolean constants to 1 to make them stateblock independent */ 4512 args->clip.boolclip.bools = shader->reg_maps.local_bool_consts; 4513 /* TODO: Figure out if it would be better to store bool constants as bitmasks in the stateblock */ 4514 for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) 4515 { 4516 if (state->vs_consts_b[i]) 4517 args->clip.boolclip.bools |= (1u << i); 4518 } 4519 4520 args->vertex.samplers[0] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 0]; 4521 args->vertex.samplers[1] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 1]; 4522 args->vertex.samplers[2] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 2]; 4523 args->vertex.samplers[3] = 0; 4524 4525 /* Skip if unused or local */ 4526 int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts; 4527 /* This is about flow control, not clipping. */ 4528 if (int_skip == 0xffff || gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]) 4529 { 4530 memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl)); 4531 return; 4532 } 4533 4534 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 4535 { 4536 if (int_skip & (1u << i)) 4537 { 4538 args->loop_ctrl[i][0] = 0; 4539 args->loop_ctrl[i][1] = 0; 4540 args->loop_ctrl[i][2] = 0; 4541 } 4542 else 4543 { 4544 args->loop_ctrl[i][0] = state->vs_consts_i[i].x; 4545 args->loop_ctrl[i][1] = state->vs_consts_i[i].y; 4546 args->loop_ctrl[i][2] = state->vs_consts_i[i].z; 4547 } 4548 } 4549 } 4550 4551 /* Context activation is done by the caller. */ 4552 static void shader_arb_select(void *shader_priv, struct wined3d_context *context, 4553 const struct wined3d_state *state) 4554 { 4555 struct shader_arb_priv *priv = shader_priv; 4556 const struct wined3d_gl_info *gl_info = context->gl_info; 4557 int i; 4558 4559 /* Deal with pixel shaders first so the vertex shader arg function has the input signature ready */ 4560 if (use_ps(state)) 4561 { 4562 struct wined3d_shader *ps = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 4563 struct arb_ps_compile_args compile_args; 4564 struct arb_ps_compiled_shader *compiled; 4565 4566 TRACE("Using pixel shader %p.\n", ps); 4567 find_arb_ps_compile_args(state, context, ps, &compile_args); 4568 compiled = find_arb_pshader(ps, &compile_args); 4569 priv->current_fprogram_id = compiled->prgId; 4570 priv->compiled_fprog = compiled; 4571 4572 /* Bind the fragment program */ 4573 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id)); 4574 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id);"); 4575 4576 if (!priv->use_arbfp_fixed_func) 4577 priv->fragment_pipe->enable_extension(gl_info, FALSE); 4578 4579 /* Enable OpenGL fragment programs. */ 4580 gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB); 4581 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);"); 4582 4583 TRACE("Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", priv->current_fprogram_id); 4584 4585 /* Pixel Shader 1.x constants are clamped to [-1;1], Pixel Shader 2.0 constants are not. If switching between 4586 * a 1.x and newer shader, reload the first 8 constants 4587 */ 4588 if (priv->last_ps_const_clamped != ((struct arb_pshader_private *)ps->backend_data)->clamp_consts) 4589 { 4590 priv->last_ps_const_clamped = ((struct arb_pshader_private *)ps->backend_data)->clamp_consts; 4591 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, 8); 4592 for(i = 0; i < 8; i++) 4593 { 4594 priv->pshader_const_dirty[i] = 1; 4595 } 4596 /* Also takes care of loading local constants */ 4597 shader_arb_load_constants_internal(shader_priv, context, state, TRUE, FALSE, TRUE); 4598 } 4599 else 4600 { 4601 UINT rt_height = state->fb->render_targets[0]->height; 4602 shader_arb_ps_local_constants(compiled, context, state, rt_height); 4603 } 4604 4605 /* Force constant reloading for the NP2 fixup (see comment in shader_glsl_select for more info) */ 4606 if (compiled->np2fixup_info.super.active) 4607 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; 4608 4609 if (ps->load_local_constsF) 4610 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_F; 4611 } 4612 else 4613 { 4614 if (gl_info->supported[ARB_FRAGMENT_PROGRAM] && !priv->use_arbfp_fixed_func) 4615 { 4616 /* Disable only if we're not using arbfp fixed function fragment 4617 * processing. If this is used, keep GL_FRAGMENT_PROGRAM_ARB 4618 * enabled, and the fixed function pipeline will bind the fixed 4619 * function replacement shader. */ 4620 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 4621 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 4622 priv->current_fprogram_id = 0; 4623 } 4624 priv->fragment_pipe->enable_extension(gl_info, TRUE); 4625 } 4626 4627 if (use_vs(state)) 4628 { 4629 struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; 4630 struct arb_vs_compile_args compile_args; 4631 struct arb_vs_compiled_shader *compiled; 4632 const struct wined3d_shader_signature *ps_input_sig; 4633 4634 TRACE("Using vertex shader %p\n", vs); 4635 find_arb_vs_compile_args(state, context, vs, &compile_args); 4636 4637 /* Instead of searching for the signature in the signature list, read the one from the 4638 * current pixel shader. It's maybe not the shader where the signature came from, but it 4639 * is the same signature and faster to find. */ 4640 if (compile_args.ps_signature == ~0U) 4641 ps_input_sig = NULL; 4642 else 4643 ps_input_sig = &state->shader[WINED3D_SHADER_TYPE_PIXEL]->input_signature; 4644 4645 compiled = find_arb_vshader(vs, context->gl_info, context->stream_info.use_map, 4646 &compile_args, ps_input_sig); 4647 priv->current_vprogram_id = compiled->prgId; 4648 priv->compiled_vprog = compiled; 4649 4650 /* Bind the vertex program */ 4651 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id)); 4652 checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id);"); 4653 4654 priv->vertex_pipe->vp_enable(gl_info, FALSE); 4655 4656 /* Enable OpenGL vertex programs */ 4657 gl_info->gl_ops.gl.p_glEnable(GL_VERTEX_PROGRAM_ARB); 4658 checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);"); 4659 TRACE("Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", priv->current_vprogram_id); 4660 shader_arb_vs_local_constants(compiled, context, state); 4661 4662 if(priv->last_vs_color_unclamp != compiled->need_color_unclamp) { 4663 priv->last_vs_color_unclamp = compiled->need_color_unclamp; 4664 4665 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT]) 4666 { 4667 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, !compiled->need_color_unclamp)); 4668 checkGLcall("glClampColorARB"); 4669 } else { 4670 FIXME("vertex color clamp needs to be changed, but extension not supported.\n"); 4671 } 4672 } 4673 4674 if (vs->load_local_constsF) 4675 context->constant_update_mask |= WINED3D_SHADER_CONST_VS_F; 4676 } 4677 else 4678 { 4679 if (gl_info->supported[ARB_VERTEX_PROGRAM]) 4680 { 4681 priv->current_vprogram_id = 0; 4682 gl_info->gl_ops.gl.p_glDisable(GL_VERTEX_PROGRAM_ARB); 4683 checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)"); 4684 } 4685 priv->vertex_pipe->vp_enable(gl_info, TRUE); 4686 } 4687 } 4688 4689 static void shader_arb_select_compute(void *shader_priv, struct wined3d_context *context, 4690 const struct wined3d_state *state) 4691 { 4692 ERR("Compute pipeline not supported by the ARB shader backend.\n"); 4693 } 4694 4695 /* Context activation is done by the caller. */ 4696 static void shader_arb_disable(void *shader_priv, struct wined3d_context *context) 4697 { 4698 const struct wined3d_gl_info *gl_info = context->gl_info; 4699 struct shader_arb_priv *priv = shader_priv; 4700 4701 if (gl_info->supported[ARB_FRAGMENT_PROGRAM]) 4702 { 4703 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 4704 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 4705 priv->current_fprogram_id = 0; 4706 } 4707 priv->fragment_pipe->enable_extension(gl_info, FALSE); 4708 4709 if (gl_info->supported[ARB_VERTEX_PROGRAM]) 4710 { 4711 priv->current_vprogram_id = 0; 4712 gl_info->gl_ops.gl.p_glDisable(GL_VERTEX_PROGRAM_ARB); 4713 checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)"); 4714 } 4715 priv->vertex_pipe->vp_enable(gl_info, FALSE); 4716 4717 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT] && priv->last_vs_color_unclamp) 4718 { 4719 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, GL_FIXED_ONLY_ARB)); 4720 checkGLcall("glClampColorARB"); 4721 priv->last_vs_color_unclamp = FALSE; 4722 } 4723 4724 context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL) 4725 | (1u << WINED3D_SHADER_TYPE_VERTEX) 4726 | (1u << WINED3D_SHADER_TYPE_GEOMETRY) 4727 | (1u << WINED3D_SHADER_TYPE_HULL) 4728 | (1u << WINED3D_SHADER_TYPE_DOMAIN) 4729 | (1u << WINED3D_SHADER_TYPE_COMPUTE); 4730 } 4731 4732 static void shader_arb_destroy(struct wined3d_shader *shader) 4733 { 4734 struct wined3d_device *device = shader->device; 4735 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 4736 4737 if (shader_is_pshader_version(shader->reg_maps.shader_version.type)) 4738 { 4739 struct arb_pshader_private *shader_data = shader->backend_data; 4740 UINT i; 4741 4742 if(!shader_data) return; /* This can happen if a shader was never compiled */ 4743 4744 if (shader_data->num_gl_shaders) 4745 { 4746 struct wined3d_context *context = context_acquire(device, NULL, 0); 4747 4748 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4749 { 4750 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId)); 4751 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))"); 4752 } 4753 4754 context_release(context); 4755 } 4756 4757 heap_free(shader_data->gl_shaders); 4758 heap_free(shader_data); 4759 shader->backend_data = NULL; 4760 } 4761 else 4762 { 4763 struct arb_vshader_private *shader_data = shader->backend_data; 4764 UINT i; 4765 4766 if(!shader_data) return; /* This can happen if a shader was never compiled */ 4767 4768 if (shader_data->num_gl_shaders) 4769 { 4770 struct wined3d_context *context = context_acquire(device, NULL, 0); 4771 4772 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4773 { 4774 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId)); 4775 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))"); 4776 } 4777 4778 context_release(context); 4779 } 4780 4781 heap_free(shader_data->gl_shaders); 4782 heap_free(shader_data); 4783 shader->backend_data = NULL; 4784 } 4785 } 4786 4787 static int sig_tree_compare(const void *key, const struct wine_rb_entry *entry) 4788 { 4789 struct ps_signature *e = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 4790 return compare_sig(key, &e->sig); 4791 } 4792 4793 static HRESULT shader_arb_alloc(struct wined3d_device *device, const struct wined3d_vertex_pipe_ops *vertex_pipe, 4794 const struct fragment_pipeline *fragment_pipe) 4795 { 4796 const struct wined3d_d3d_info *d3d_info = &device->adapter->d3d_info; 4797 struct fragment_caps fragment_caps; 4798 void *vertex_priv, *fragment_priv; 4799 struct shader_arb_priv *priv; 4800 4801 if (!(priv = heap_alloc_zero(sizeof(*priv)))) 4802 return E_OUTOFMEMORY; 4803 4804 if (!(vertex_priv = vertex_pipe->vp_alloc(&arb_program_shader_backend, priv))) 4805 { 4806 ERR("Failed to initialize vertex pipe.\n"); 4807 heap_free(priv); 4808 return E_FAIL; 4809 } 4810 4811 if (!(fragment_priv = fragment_pipe->alloc_private(&arb_program_shader_backend, priv))) 4812 { 4813 ERR("Failed to initialize fragment pipe.\n"); 4814 vertex_pipe->vp_free(device); 4815 heap_free(priv); 4816 return E_FAIL; 4817 } 4818 4819 memset(priv->vshader_const_dirty, 1, 4820 sizeof(*priv->vshader_const_dirty) * d3d_info->limits.vs_uniform_count); 4821 memset(priv->pshader_const_dirty, 1, 4822 sizeof(*priv->pshader_const_dirty) * d3d_info->limits.ps_uniform_count); 4823 4824 wine_rb_init(&priv->signature_tree, sig_tree_compare); 4825 4826 priv->vertex_pipe = vertex_pipe; 4827 priv->fragment_pipe = fragment_pipe; 4828 fragment_pipe->get_caps(&device->adapter->gl_info, &fragment_caps); 4829 priv->ffp_proj_control = fragment_caps.wined3d_caps & WINED3D_FRAGMENT_CAP_PROJ_CONTROL; 4830 4831 device->vertex_priv = vertex_priv; 4832 device->fragment_priv = fragment_priv; 4833 device->shader_priv = priv; 4834 4835 return WINED3D_OK; 4836 } 4837 4838 static void release_signature(struct wine_rb_entry *entry, void *context) 4839 { 4840 struct ps_signature *sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 4841 unsigned int i; 4842 4843 for (i = 0; i < sig->sig.element_count; ++i) 4844 { 4845 heap_free((char *)sig->sig.elements[i].semantic_name); 4846 } 4847 heap_free(sig->sig.elements); 4848 heap_free(sig); 4849 } 4850 4851 /* Context activation is done by the caller. */ 4852 static void shader_arb_free(struct wined3d_device *device) 4853 { 4854 struct shader_arb_priv *priv = device->shader_priv; 4855 4856 wine_rb_destroy(&priv->signature_tree, release_signature, NULL); 4857 priv->fragment_pipe->free_private(device); 4858 priv->vertex_pipe->vp_free(device); 4859 heap_free(device->shader_priv); 4860 } 4861 4862 static BOOL shader_arb_allocate_context_data(struct wined3d_context *context) 4863 { 4864 return TRUE; 4865 } 4866 4867 static void shader_arb_free_context_data(struct wined3d_context *context) 4868 { 4869 struct shader_arb_priv *priv; 4870 4871 priv = context->device->shader_priv; 4872 if (priv->last_context == context) 4873 priv->last_context = NULL; 4874 } 4875 4876 static void shader_arb_init_context_state(struct wined3d_context *context) {} 4877 4878 static void shader_arb_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps) 4879 { 4880 if (gl_info->supported[ARB_VERTEX_PROGRAM]) 4881 { 4882 DWORD vs_consts; 4883 UINT vs_version; 4884 4885 /* 96 is the minimum allowed value of MAX_PROGRAM_ENV_PARAMETERS_ARB 4886 * for vertex programs. If the native limit is less than that it's 4887 * not very useful, and e.g. Mesa swrast returns 0, probably to 4888 * indicate it's a software implementation. */ 4889 if (gl_info->limits.arb_vs_native_constants < 96) 4890 vs_consts = gl_info->limits.arb_vs_float_constants; 4891 else 4892 vs_consts = min(gl_info->limits.arb_vs_float_constants, gl_info->limits.arb_vs_native_constants); 4893 4894 if (gl_info->supported[NV_VERTEX_PROGRAM3]) 4895 { 4896 vs_version = 3; 4897 TRACE("Hardware vertex shader version 3.0 enabled (NV_VERTEX_PROGRAM3)\n"); 4898 } 4899 else if (vs_consts >= 256) 4900 { 4901 /* Shader Model 2.0 requires at least 256 vertex shader constants */ 4902 vs_version = 2; 4903 TRACE("Hardware vertex shader version 2.0 enabled (ARB_PROGRAM)\n"); 4904 } 4905 else 4906 { 4907 vs_version = 1; 4908 TRACE("Hardware vertex shader version 1.1 enabled (ARB_PROGRAM)\n"); 4909 } 4910 caps->vs_version = min(wined3d_settings.max_sm_vs, vs_version); 4911 caps->vs_uniform_count = min(WINED3D_MAX_VS_CONSTS_F, vs_consts); 4912 } 4913 else 4914 { 4915 caps->vs_version = 0; 4916 caps->vs_uniform_count = 0; 4917 } 4918 4919 caps->hs_version = 0; 4920 caps->ds_version = 0; 4921 caps->gs_version = 0; 4922 caps->cs_version = 0; 4923 4924 if (gl_info->supported[ARB_FRAGMENT_PROGRAM]) 4925 { 4926 DWORD ps_consts; 4927 UINT ps_version; 4928 4929 /* Similar as above for vertex programs, but the minimum for fragment 4930 * programs is 24. */ 4931 if (gl_info->limits.arb_ps_native_constants < 24) 4932 ps_consts = gl_info->limits.arb_ps_float_constants; 4933 else 4934 ps_consts = min(gl_info->limits.arb_ps_float_constants, gl_info->limits.arb_ps_native_constants); 4935 4936 if (gl_info->supported[NV_FRAGMENT_PROGRAM2]) 4937 { 4938 ps_version = 3; 4939 TRACE("Hardware pixel shader version 3.0 enabled (NV_FRAGMENT_PROGRAM2)\n"); 4940 } 4941 else if (ps_consts >= 32) 4942 { 4943 /* Shader Model 2.0 requires at least 32 pixel shader constants */ 4944 ps_version = 2; 4945 TRACE("Hardware pixel shader version 2.0 enabled (ARB_PROGRAM)\n"); 4946 } 4947 else 4948 { 4949 ps_version = 1; 4950 TRACE("Hardware pixel shader version 1.4 enabled (ARB_PROGRAM)\n"); 4951 } 4952 caps->ps_version = min(wined3d_settings.max_sm_ps, ps_version); 4953 caps->ps_uniform_count = min(WINED3D_MAX_PS_CONSTS_F, ps_consts); 4954 caps->ps_1x_max_value = 8.0f; 4955 } 4956 else 4957 { 4958 caps->ps_version = 0; 4959 caps->ps_uniform_count = 0; 4960 caps->ps_1x_max_value = 0.0f; 4961 } 4962 4963 caps->varying_count = 0; 4964 caps->wined3d_caps = WINED3D_SHADER_CAP_SRGB_WRITE; 4965 if (use_nv_clip(gl_info)) 4966 caps->wined3d_caps |= WINED3D_SHADER_CAP_VS_CLIPPING; 4967 } 4968 4969 static BOOL shader_arb_color_fixup_supported(struct color_fixup_desc fixup) 4970 { 4971 /* We support everything except complex conversions. */ 4972 return !is_complex_fixup(fixup); 4973 } 4974 4975 static void shader_arb_add_instruction_modifiers(const struct wined3d_shader_instruction *ins) { 4976 DWORD shift; 4977 char write_mask[20], regstr[50]; 4978 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 4979 BOOL is_color = FALSE; 4980 const struct wined3d_shader_dst_param *dst; 4981 4982 if (!ins->dst_count) return; 4983 4984 dst = &ins->dst[0]; 4985 shift = dst->shift; 4986 if (!shift) return; /* Saturate alone is handled by the instructions */ 4987 4988 shader_arb_get_write_mask(ins, dst, write_mask); 4989 shader_arb_get_register_name(ins, &dst->reg, regstr, &is_color); 4990 4991 /* Generate a line that does the output modifier computation 4992 * FIXME: _SAT vs shift? _SAT alone is already handled in the instructions, if this 4993 * maps problems in e.g. _d4_sat modify shader_arb_get_modifier 4994 */ 4995 shader_addline(buffer, "MUL%s %s%s, %s, %s;\n", shader_arb_get_modifier(ins), 4996 regstr, write_mask, regstr, shift_tab[shift]); 4997 } 4998 4999 static const SHADER_HANDLER shader_arb_instruction_handler_table[WINED3DSIH_TABLE_SIZE] = 5000 { 5001 /* WINED3DSIH_ABS */ shader_hw_map2gl, 5002 /* WINED3DSIH_ADD */ shader_hw_map2gl, 5003 /* WINED3DSIH_AND */ NULL, 5004 /* WINED3DSIH_ATOMIC_AND */ NULL, 5005 /* WINED3DSIH_ATOMIC_CMP_STORE */ NULL, 5006 /* WINED3DSIH_ATOMIC_IADD */ NULL, 5007 /* WINED3DSIH_ATOMIC_IMAX */ NULL, 5008 /* WINED3DSIH_ATOMIC_IMIN */ NULL, 5009 /* WINED3DSIH_ATOMIC_OR */ NULL, 5010 /* WINED3DSIH_ATOMIC_UMAX */ NULL, 5011 /* WINED3DSIH_ATOMIC_UMIN */ NULL, 5012 /* WINED3DSIH_ATOMIC_XOR */ NULL, 5013 /* WINED3DSIH_BEM */ pshader_hw_bem, 5014 /* WINED3DSIH_BFI */ NULL, 5015 /* WINED3DSIH_BFREV */ NULL, 5016 /* WINED3DSIH_BREAK */ shader_hw_break, 5017 /* WINED3DSIH_BREAKC */ shader_hw_breakc, 5018 /* WINED3DSIH_BREAKP */ NULL, 5019 /* WINED3DSIH_BUFINFO */ NULL, 5020 /* WINED3DSIH_CALL */ shader_hw_call, 5021 /* WINED3DSIH_CALLNZ */ NULL, 5022 /* WINED3DSIH_CASE */ NULL, 5023 /* WINED3DSIH_CMP */ pshader_hw_cmp, 5024 /* WINED3DSIH_CND */ pshader_hw_cnd, 5025 /* WINED3DSIH_CONTINUE */ NULL, 5026 /* WINED3DSIH_CONTINUEP */ NULL, 5027 /* WINED3DSIH_COUNTBITS */ NULL, 5028 /* WINED3DSIH_CRS */ shader_hw_map2gl, 5029 /* WINED3DSIH_CUT */ NULL, 5030 /* WINED3DSIH_CUT_STREAM */ NULL, 5031 /* WINED3DSIH_DCL */ shader_hw_nop, 5032 /* WINED3DSIH_DCL_CONSTANT_BUFFER */ shader_hw_nop, 5033 /* WINED3DSIH_DCL_FUNCTION_BODY */ NULL, 5034 /* WINED3DSIH_DCL_FUNCTION_TABLE */ NULL, 5035 /* WINED3DSIH_DCL_GLOBAL_FLAGS */ NULL, 5036 /* WINED3DSIH_DCL_GS_INSTANCES */ NULL, 5037 /* WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT */ NULL, 5038 /* WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */ NULL, 5039 /* WINED3DSIH_DCL_HS_MAX_TESSFACTOR */ NULL, 5040 /* WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER */ NULL, 5041 /* WINED3DSIH_DCL_INDEX_RANGE */ NULL, 5042 /* WINED3DSIH_DCL_INDEXABLE_TEMP */ NULL, 5043 /* WINED3DSIH_DCL_INPUT */ NULL, 5044 /* WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT */ NULL, 5045 /* WINED3DSIH_DCL_INPUT_PRIMITIVE */ shader_hw_nop, 5046 /* WINED3DSIH_DCL_INPUT_PS */ NULL, 5047 /* WINED3DSIH_DCL_INPUT_PS_SGV */ NULL, 5048 /* WINED3DSIH_DCL_INPUT_PS_SIV */ NULL, 5049 /* WINED3DSIH_DCL_INPUT_SGV */ NULL, 5050 /* WINED3DSIH_DCL_INPUT_SIV */ NULL, 5051 /* WINED3DSIH_DCL_INTERFACE */ NULL, 5052 /* WINED3DSIH_DCL_OUTPUT */ NULL, 5053 /* WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT */ NULL, 5054 /* WINED3DSIH_DCL_OUTPUT_SIV */ NULL, 5055 /* WINED3DSIH_DCL_OUTPUT_TOPOLOGY */ shader_hw_nop, 5056 /* WINED3DSIH_DCL_RESOURCE_RAW */ NULL, 5057 /* WINED3DSIH_DCL_RESOURCE_STRUCTURED */ NULL, 5058 /* WINED3DSIH_DCL_SAMPLER */ NULL, 5059 /* WINED3DSIH_DCL_STREAM */ NULL, 5060 /* WINED3DSIH_DCL_TEMPS */ NULL, 5061 /* WINED3DSIH_DCL_TESSELLATOR_DOMAIN */ NULL, 5062 /* WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE */ NULL, 5063 /* WINED3DSIH_DCL_TESSELLATOR_PARTITIONING */ NULL, 5064 /* WINED3DSIH_DCL_TGSM_RAW */ NULL, 5065 /* WINED3DSIH_DCL_TGSM_STRUCTURED */ NULL, 5066 /* WINED3DSIH_DCL_THREAD_GROUP */ NULL, 5067 /* WINED3DSIH_DCL_UAV_RAW */ NULL, 5068 /* WINED3DSIH_DCL_UAV_STRUCTURED */ NULL, 5069 /* WINED3DSIH_DCL_UAV_TYPED */ NULL, 5070 /* WINED3DSIH_DCL_VERTICES_OUT */ shader_hw_nop, 5071 /* WINED3DSIH_DEF */ shader_hw_nop, 5072 /* WINED3DSIH_DEFAULT */ NULL, 5073 /* WINED3DSIH_DEFB */ shader_hw_nop, 5074 /* WINED3DSIH_DEFI */ shader_hw_nop, 5075 /* WINED3DSIH_DIV */ NULL, 5076 /* WINED3DSIH_DP2 */ NULL, 5077 /* WINED3DSIH_DP2ADD */ pshader_hw_dp2add, 5078 /* WINED3DSIH_DP3 */ shader_hw_map2gl, 5079 /* WINED3DSIH_DP4 */ shader_hw_map2gl, 5080 /* WINED3DSIH_DST */ shader_hw_map2gl, 5081 /* WINED3DSIH_DSX */ shader_hw_map2gl, 5082 /* WINED3DSIH_DSX_COARSE */ NULL, 5083 /* WINED3DSIH_DSX_FINE */ NULL, 5084 /* WINED3DSIH_DSY */ shader_hw_dsy, 5085 /* WINED3DSIH_DSY_COARSE */ NULL, 5086 /* WINED3DSIH_DSY_FINE */ NULL, 5087 /* WINED3DSIH_EVAL_SAMPLE_INDEX */ NULL, 5088 /* WINED3DSIH_ELSE */ shader_hw_else, 5089 /* WINED3DSIH_EMIT */ NULL, 5090 /* WINED3DSIH_EMIT_STREAM */ NULL, 5091 /* WINED3DSIH_ENDIF */ shader_hw_endif, 5092 /* WINED3DSIH_ENDLOOP */ shader_hw_endloop, 5093 /* WINED3DSIH_ENDREP */ shader_hw_endrep, 5094 /* WINED3DSIH_ENDSWITCH */ NULL, 5095 /* WINED3DSIH_EQ */ NULL, 5096 /* WINED3DSIH_EXP */ shader_hw_scalar_op, 5097 /* WINED3DSIH_EXPP */ shader_hw_scalar_op, 5098 /* WINED3DSIH_F16TOF32 */ NULL, 5099 /* WINED3DSIH_F32TOF16 */ NULL, 5100 /* WINED3DSIH_FCALL */ NULL, 5101 /* WINED3DSIH_FIRSTBIT_HI */ NULL, 5102 /* WINED3DSIH_FIRSTBIT_LO */ NULL, 5103 /* WINED3DSIH_FIRSTBIT_SHI */ NULL, 5104 /* WINED3DSIH_FRC */ shader_hw_map2gl, 5105 /* WINED3DSIH_FTOI */ NULL, 5106 /* WINED3DSIH_FTOU */ NULL, 5107 /* WINED3DSIH_GATHER4 */ NULL, 5108 /* WINED3DSIH_GATHER4_C */ NULL, 5109 /* WINED3DSIH_GATHER4_PO */ NULL, 5110 /* WINED3DSIH_GATHER4_PO_C */ NULL, 5111 /* WINED3DSIH_GE */ NULL, 5112 /* WINED3DSIH_HS_CONTROL_POINT_PHASE */ NULL, 5113 /* WINED3DSIH_HS_DECLS */ NULL, 5114 /* WINED3DSIH_HS_FORK_PHASE */ NULL, 5115 /* WINED3DSIH_HS_JOIN_PHASE */ NULL, 5116 /* WINED3DSIH_IADD */ NULL, 5117 /* WINED3DSIH_IBFE */ NULL, 5118 /* WINED3DSIH_IEQ */ NULL, 5119 /* WINED3DSIH_IF */ NULL /* Hardcoded into the shader */, 5120 /* WINED3DSIH_IFC */ shader_hw_ifc, 5121 /* WINED3DSIH_IGE */ NULL, 5122 /* WINED3DSIH_ILT */ NULL, 5123 /* WINED3DSIH_IMAD */ NULL, 5124 /* WINED3DSIH_IMAX */ NULL, 5125 /* WINED3DSIH_IMIN */ NULL, 5126 /* WINED3DSIH_IMM_ATOMIC_ALLOC */ NULL, 5127 /* WINED3DSIH_IMM_ATOMIC_AND */ NULL, 5128 /* WINED3DSIH_IMM_ATOMIC_CMP_EXCH */ NULL, 5129 /* WINED3DSIH_IMM_ATOMIC_CONSUME */ NULL, 5130 /* WINED3DSIH_IMM_ATOMIC_EXCH */ NULL, 5131 /* WINED3DSIH_IMM_ATOMIC_IADD */ NULL, 5132 /* WINED3DSIH_IMM_ATOMIC_IMAX */ NULL, 5133 /* WINED3DSIH_IMM_ATOMIC_IMIN */ NULL, 5134 /* WINED3DSIH_IMM_ATOMIC_OR */ NULL, 5135 /* WINED3DSIH_IMM_ATOMIC_UMAX */ NULL, 5136 /* WINED3DSIH_IMM_ATOMIC_UMIN */ NULL, 5137 /* WINED3DSIH_IMM_ATOMIC_XOR */ NULL, 5138 /* WINED3DSIH_IMUL */ NULL, 5139 /* WINED3DSIH_INE */ NULL, 5140 /* WINED3DSIH_INEG */ NULL, 5141 /* WINED3DSIH_ISHL */ NULL, 5142 /* WINED3DSIH_ISHR */ NULL, 5143 /* WINED3DSIH_ITOF */ NULL, 5144 /* WINED3DSIH_LABEL */ shader_hw_label, 5145 /* WINED3DSIH_LD */ NULL, 5146 /* WINED3DSIH_LD2DMS */ NULL, 5147 /* WINED3DSIH_LD_RAW */ NULL, 5148 /* WINED3DSIH_LD_STRUCTURED */ NULL, 5149 /* WINED3DSIH_LD_UAV_TYPED */ NULL, 5150 /* WINED3DSIH_LIT */ shader_hw_map2gl, 5151 /* WINED3DSIH_LOD */ NULL, 5152 /* WINED3DSIH_LOG */ shader_hw_scalar_op, 5153 /* WINED3DSIH_LOGP */ shader_hw_scalar_op, 5154 /* WINED3DSIH_LOOP */ shader_hw_loop, 5155 /* WINED3DSIH_LRP */ shader_hw_lrp, 5156 /* WINED3DSIH_LT */ NULL, 5157 /* WINED3DSIH_M3x2 */ shader_hw_mnxn, 5158 /* WINED3DSIH_M3x3 */ shader_hw_mnxn, 5159 /* WINED3DSIH_M3x4 */ shader_hw_mnxn, 5160 /* WINED3DSIH_M4x3 */ shader_hw_mnxn, 5161 /* WINED3DSIH_M4x4 */ shader_hw_mnxn, 5162 /* WINED3DSIH_MAD */ shader_hw_map2gl, 5163 /* WINED3DSIH_MAX */ shader_hw_map2gl, 5164 /* WINED3DSIH_MIN */ shader_hw_map2gl, 5165 /* WINED3DSIH_MOV */ shader_hw_mov, 5166 /* WINED3DSIH_MOVA */ shader_hw_mov, 5167 /* WINED3DSIH_MOVC */ NULL, 5168 /* WINED3DSIH_MUL */ shader_hw_map2gl, 5169 /* WINED3DSIH_NE */ NULL, 5170 /* WINED3DSIH_NOP */ shader_hw_nop, 5171 /* WINED3DSIH_NOT */ NULL, 5172 /* WINED3DSIH_NRM */ shader_hw_nrm, 5173 /* WINED3DSIH_OR */ NULL, 5174 /* WINED3DSIH_PHASE */ shader_hw_nop, 5175 /* WINED3DSIH_POW */ shader_hw_pow, 5176 /* WINED3DSIH_RCP */ shader_hw_scalar_op, 5177 /* WINED3DSIH_REP */ shader_hw_rep, 5178 /* WINED3DSIH_RESINFO */ NULL, 5179 /* WINED3DSIH_RET */ shader_hw_ret, 5180 /* WINED3DSIH_RETP */ NULL, 5181 /* WINED3DSIH_ROUND_NE */ NULL, 5182 /* WINED3DSIH_ROUND_NI */ NULL, 5183 /* WINED3DSIH_ROUND_PI */ NULL, 5184 /* WINED3DSIH_ROUND_Z */ NULL, 5185 /* WINED3DSIH_RSQ */ shader_hw_scalar_op, 5186 /* WINED3DSIH_SAMPLE */ NULL, 5187 /* WINED3DSIH_SAMPLE_B */ NULL, 5188 /* WINED3DSIH_SAMPLE_C */ NULL, 5189 /* WINED3DSIH_SAMPLE_C_LZ */ NULL, 5190 /* WINED3DSIH_SAMPLE_GRAD */ NULL, 5191 /* WINED3DSIH_SAMPLE_INFO */ NULL, 5192 /* WINED3DSIH_SAMPLE_LOD */ NULL, 5193 /* WINED3DSIH_SAMPLE_POS */ NULL, 5194 /* WINED3DSIH_SETP */ NULL, 5195 /* WINED3DSIH_SGE */ shader_hw_map2gl, 5196 /* WINED3DSIH_SGN */ shader_hw_sgn, 5197 /* WINED3DSIH_SINCOS */ shader_hw_sincos, 5198 /* WINED3DSIH_SLT */ shader_hw_map2gl, 5199 /* WINED3DSIH_SQRT */ NULL, 5200 /* WINED3DSIH_STORE_RAW */ NULL, 5201 /* WINED3DSIH_STORE_STRUCTURED */ NULL, 5202 /* WINED3DSIH_STORE_UAV_TYPED */ NULL, 5203 /* WINED3DSIH_SUB */ shader_hw_map2gl, 5204 /* WINED3DSIH_SWAPC */ NULL, 5205 /* WINED3DSIH_SWITCH */ NULL, 5206 /* WINED3DSIH_SYNC */ NULL, 5207 /* WINED3DSIH_TEX */ pshader_hw_tex, 5208 /* WINED3DSIH_TEXBEM */ pshader_hw_texbem, 5209 /* WINED3DSIH_TEXBEML */ pshader_hw_texbem, 5210 /* WINED3DSIH_TEXCOORD */ pshader_hw_texcoord, 5211 /* WINED3DSIH_TEXDEPTH */ pshader_hw_texdepth, 5212 /* WINED3DSIH_TEXDP3 */ pshader_hw_texdp3, 5213 /* WINED3DSIH_TEXDP3TEX */ pshader_hw_texdp3tex, 5214 /* WINED3DSIH_TEXKILL */ pshader_hw_texkill, 5215 /* WINED3DSIH_TEXLDD */ shader_hw_texldd, 5216 /* WINED3DSIH_TEXLDL */ shader_hw_texldl, 5217 /* WINED3DSIH_TEXM3x2DEPTH */ pshader_hw_texm3x2depth, 5218 /* WINED3DSIH_TEXM3x2PAD */ pshader_hw_texm3x2pad, 5219 /* WINED3DSIH_TEXM3x2TEX */ pshader_hw_texm3x2tex, 5220 /* WINED3DSIH_TEXM3x3 */ pshader_hw_texm3x3, 5221 /* WINED3DSIH_TEXM3x3DIFF */ NULL, 5222 /* WINED3DSIH_TEXM3x3PAD */ pshader_hw_texm3x3pad, 5223 /* WINED3DSIH_TEXM3x3SPEC */ pshader_hw_texm3x3spec, 5224 /* WINED3DSIH_TEXM3x3TEX */ pshader_hw_texm3x3tex, 5225 /* WINED3DSIH_TEXM3x3VSPEC */ pshader_hw_texm3x3vspec, 5226 /* WINED3DSIH_TEXREG2AR */ pshader_hw_texreg2ar, 5227 /* WINED3DSIH_TEXREG2GB */ pshader_hw_texreg2gb, 5228 /* WINED3DSIH_TEXREG2RGB */ pshader_hw_texreg2rgb, 5229 /* WINED3DSIH_UBFE */ NULL, 5230 /* WINED3DSIH_UDIV */ NULL, 5231 /* WINED3DSIH_UGE */ NULL, 5232 /* WINED3DSIH_ULT */ NULL, 5233 /* WINED3DSIH_UMAX */ NULL, 5234 /* WINED3DSIH_UMIN */ NULL, 5235 /* WINED3DSIH_UMUL */ NULL, 5236 /* WINED3DSIH_USHR */ NULL, 5237 /* WINED3DSIH_UTOF */ NULL, 5238 /* WINED3DSIH_XOR */ NULL, 5239 }; 5240 5241 static BOOL get_bool_const(const struct wined3d_shader_instruction *ins, 5242 const struct wined3d_shader *shader, DWORD idx) 5243 { 5244 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 5245 BOOL vshader = shader_is_vshader_version(reg_maps->shader_version.type); 5246 const struct wined3d_shader_lconst *constant; 5247 WORD bools = 0; 5248 WORD flag = (1u << idx); 5249 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5250 5251 if (reg_maps->local_bool_consts & flag) 5252 { 5253 /* What good is an if(bool) with a hardcoded local constant? I don't know, but handle it */ 5254 LIST_FOR_EACH_ENTRY(constant, &shader->constantsB, struct wined3d_shader_lconst, entry) 5255 { 5256 if (constant->idx == idx) 5257 { 5258 return constant->value[0]; 5259 } 5260 } 5261 ERR("Local constant not found\n"); 5262 return FALSE; 5263 } 5264 else 5265 { 5266 if(vshader) bools = priv->cur_vs_args->clip.boolclip.bools; 5267 else bools = priv->cur_ps_args->bools; 5268 return bools & flag; 5269 } 5270 } 5271 5272 static void get_loop_control_const(const struct wined3d_shader_instruction *ins, 5273 const struct wined3d_shader *shader, UINT idx, struct wined3d_shader_loop_control *loop_control) 5274 { 5275 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 5276 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5277 5278 /* Integer constants can either be a local constant, or they can be stored in the shader 5279 * type specific compile args. */ 5280 if (reg_maps->local_int_consts & (1u << idx)) 5281 { 5282 const struct wined3d_shader_lconst *constant; 5283 5284 LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry) 5285 { 5286 if (constant->idx == idx) 5287 { 5288 loop_control->count = constant->value[0]; 5289 loop_control->start = constant->value[1]; 5290 /* Step is signed. */ 5291 loop_control->step = (int)constant->value[2]; 5292 return; 5293 } 5294 } 5295 /* If this happens the flag was set incorrectly */ 5296 ERR("Local constant not found\n"); 5297 loop_control->count = 0; 5298 loop_control->start = 0; 5299 loop_control->step = 0; 5300 return; 5301 } 5302 5303 switch (reg_maps->shader_version.type) 5304 { 5305 case WINED3D_SHADER_TYPE_VERTEX: 5306 /* Count and aL start value are unsigned */ 5307 loop_control->count = priv->cur_vs_args->loop_ctrl[idx][0]; 5308 loop_control->start = priv->cur_vs_args->loop_ctrl[idx][1]; 5309 /* Step is signed. */ 5310 loop_control->step = ((char)priv->cur_vs_args->loop_ctrl[idx][2]); 5311 break; 5312 5313 case WINED3D_SHADER_TYPE_PIXEL: 5314 loop_control->count = priv->cur_ps_args->loop_ctrl[idx][0]; 5315 loop_control->start = priv->cur_ps_args->loop_ctrl[idx][1]; 5316 loop_control->step = ((char)priv->cur_ps_args->loop_ctrl[idx][2]); 5317 break; 5318 5319 default: 5320 FIXME("Unhandled shader type %#x.\n", reg_maps->shader_version.type); 5321 break; 5322 } 5323 } 5324 5325 static void record_instruction(struct list *list, const struct wined3d_shader_instruction *ins) 5326 { 5327 struct wined3d_shader_src_param *src_param = NULL, *rel_addr; 5328 struct wined3d_shader_dst_param *dst_param; 5329 struct recorded_instruction *rec; 5330 unsigned int i; 5331 5332 if (!(rec = heap_alloc_zero(sizeof(*rec)))) 5333 { 5334 ERR("Out of memory\n"); 5335 return; 5336 } 5337 5338 rec->ins = *ins; 5339 if (!(dst_param = heap_alloc(sizeof(*dst_param)))) 5340 goto free; 5341 *dst_param = *ins->dst; 5342 if (ins->dst->reg.idx[0].rel_addr) 5343 { 5344 if (!(rel_addr = heap_alloc(sizeof(*rel_addr)))) 5345 goto free; 5346 *rel_addr = *ins->dst->reg.idx[0].rel_addr; 5347 dst_param->reg.idx[0].rel_addr = rel_addr; 5348 } 5349 rec->ins.dst = dst_param; 5350 5351 if (!(src_param = heap_calloc(ins->src_count, sizeof(*src_param)))) 5352 goto free; 5353 for (i = 0; i < ins->src_count; ++i) 5354 { 5355 src_param[i] = ins->src[i]; 5356 if (ins->src[i].reg.idx[0].rel_addr) 5357 { 5358 if (!(rel_addr = heap_alloc(sizeof(*rel_addr)))) 5359 goto free; 5360 *rel_addr = *ins->src[i].reg.idx[0].rel_addr; 5361 src_param[i].reg.idx[0].rel_addr = rel_addr; 5362 } 5363 } 5364 rec->ins.src = src_param; 5365 list_add_tail(list, &rec->entry); 5366 return; 5367 5368 free: 5369 ERR("Out of memory\n"); 5370 if (dst_param) 5371 { 5372 heap_free((void *)dst_param->reg.idx[0].rel_addr); 5373 heap_free(dst_param); 5374 } 5375 if (src_param) 5376 { 5377 for (i = 0; i < ins->src_count; ++i) 5378 { 5379 heap_free((void *)src_param[i].reg.idx[0].rel_addr); 5380 } 5381 heap_free(src_param); 5382 } 5383 heap_free(rec); 5384 } 5385 5386 static void free_recorded_instruction(struct list *list) 5387 { 5388 struct recorded_instruction *rec_ins, *entry2; 5389 unsigned int i; 5390 5391 LIST_FOR_EACH_ENTRY_SAFE(rec_ins, entry2, list, struct recorded_instruction, entry) 5392 { 5393 list_remove(&rec_ins->entry); 5394 if (rec_ins->ins.dst) 5395 { 5396 heap_free((void *)rec_ins->ins.dst->reg.idx[0].rel_addr); 5397 heap_free((void *)rec_ins->ins.dst); 5398 } 5399 if (rec_ins->ins.src) 5400 { 5401 for (i = 0; i < rec_ins->ins.src_count; ++i) 5402 { 5403 heap_free((void *)rec_ins->ins.src[i].reg.idx[0].rel_addr); 5404 } 5405 heap_free((void *)rec_ins->ins.src); 5406 } 5407 heap_free(rec_ins); 5408 } 5409 } 5410 5411 static void pop_control_frame(const struct wined3d_shader_instruction *ins) 5412 { 5413 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5414 struct control_frame *control_frame; 5415 5416 if (ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP) 5417 { 5418 struct list *e = list_head(&priv->control_frames); 5419 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5420 list_remove(&control_frame->entry); 5421 heap_free(control_frame); 5422 priv->loop_depth--; 5423 } 5424 else if (ins->handler_idx == WINED3DSIH_ENDIF) 5425 { 5426 /* Non-ifc ENDIFs were already handled previously. */ 5427 struct list *e = list_head(&priv->control_frames); 5428 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5429 list_remove(&control_frame->entry); 5430 heap_free(control_frame); 5431 } 5432 } 5433 5434 static void shader_arb_handle_instruction(const struct wined3d_shader_instruction *ins) { 5435 SHADER_HANDLER hw_fct; 5436 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5437 const struct wined3d_shader *shader = ins->ctx->shader; 5438 struct control_frame *control_frame; 5439 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 5440 BOOL bool_const; 5441 5442 if(ins->handler_idx == WINED3DSIH_LOOP || ins->handler_idx == WINED3DSIH_REP) 5443 { 5444 control_frame = heap_alloc_zero(sizeof(*control_frame)); 5445 list_add_head(&priv->control_frames, &control_frame->entry); 5446 5447 if(ins->handler_idx == WINED3DSIH_LOOP) control_frame->type = LOOP; 5448 if(ins->handler_idx == WINED3DSIH_REP) control_frame->type = REP; 5449 5450 if(priv->target_version >= NV2) 5451 { 5452 control_frame->no.loop = priv->num_loops++; 5453 priv->loop_depth++; 5454 } 5455 else 5456 { 5457 /* Don't bother recording when we're in a not used if branch */ 5458 if(priv->muted) 5459 { 5460 return; 5461 } 5462 5463 if(!priv->recording) 5464 { 5465 list_init(&priv->record); 5466 priv->recording = TRUE; 5467 control_frame->outer_loop = TRUE; 5468 get_loop_control_const(ins, shader, ins->src[0].reg.idx[0].offset, &control_frame->loop_control); 5469 return; /* Instruction is handled */ 5470 } 5471 /* Record this loop in the outer loop's recording */ 5472 } 5473 } 5474 else if(ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP) 5475 { 5476 if(priv->target_version >= NV2) 5477 { 5478 /* Nothing to do. The control frame is popped after the HW instr handler */ 5479 } 5480 else 5481 { 5482 struct list *e = list_head(&priv->control_frames); 5483 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5484 list_remove(&control_frame->entry); 5485 5486 if(control_frame->outer_loop) 5487 { 5488 unsigned int iteration; 5489 int aL = 0; 5490 struct list copy; 5491 5492 /* Turn off recording before playback */ 5493 priv->recording = FALSE; 5494 5495 /* Move the recorded instructions to a separate list and get them out of the private data 5496 * structure. If there are nested loops, the shader_arb_handle_instruction below will 5497 * be recorded again, thus priv->record might be overwritten 5498 */ 5499 list_init(©); 5500 list_move_tail(©, &priv->record); 5501 list_init(&priv->record); 5502 5503 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5504 { 5505 shader_addline(buffer, "#unrolling loop: %u iterations, aL=%u, inc %d\n", 5506 control_frame->loop_control.count, control_frame->loop_control.start, 5507 control_frame->loop_control.step); 5508 aL = control_frame->loop_control.start; 5509 } 5510 else 5511 { 5512 shader_addline(buffer, "#unrolling rep: %u iterations\n", control_frame->loop_control.count); 5513 } 5514 5515 for (iteration = 0; iteration < control_frame->loop_control.count; ++iteration) 5516 { 5517 struct recorded_instruction *rec_ins; 5518 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5519 { 5520 priv->aL = aL; 5521 shader_addline(buffer, "#Iteration %u, aL=%d\n", iteration, aL); 5522 } 5523 else 5524 { 5525 shader_addline(buffer, "#Iteration %u\n", iteration); 5526 } 5527 5528 LIST_FOR_EACH_ENTRY(rec_ins, ©, struct recorded_instruction, entry) 5529 { 5530 shader_arb_handle_instruction(&rec_ins->ins); 5531 } 5532 5533 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5534 { 5535 aL += control_frame->loop_control.step; 5536 } 5537 } 5538 shader_addline(buffer, "#end loop/rep\n"); 5539 5540 free_recorded_instruction(©); 5541 heap_free(control_frame); 5542 return; /* Instruction is handled */ 5543 } 5544 else 5545 { 5546 /* This is a nested loop. Proceed to the normal recording function */ 5547 heap_free(control_frame); 5548 } 5549 } 5550 } 5551 5552 if(priv->recording) 5553 { 5554 record_instruction(&priv->record, ins); 5555 return; 5556 } 5557 5558 /* boolean if */ 5559 if(ins->handler_idx == WINED3DSIH_IF) 5560 { 5561 control_frame = heap_alloc_zero(sizeof(*control_frame)); 5562 list_add_head(&priv->control_frames, &control_frame->entry); 5563 control_frame->type = IF; 5564 5565 bool_const = get_bool_const(ins, shader, ins->src[0].reg.idx[0].offset); 5566 if (ins->src[0].modifiers == WINED3DSPSM_NOT) 5567 bool_const = !bool_const; 5568 if (!priv->muted && !bool_const) 5569 { 5570 shader_addline(buffer, "#if(FALSE){\n"); 5571 priv->muted = TRUE; 5572 control_frame->muting = TRUE; 5573 } 5574 else shader_addline(buffer, "#if(TRUE) {\n"); 5575 5576 return; /* Instruction is handled */ 5577 } 5578 else if(ins->handler_idx == WINED3DSIH_IFC) 5579 { 5580 /* IF(bool) and if_cond(a, b) use the same ELSE and ENDIF tokens */ 5581 control_frame = heap_alloc_zero(sizeof(*control_frame)); 5582 control_frame->type = IFC; 5583 control_frame->no.ifc = priv->num_ifcs++; 5584 list_add_head(&priv->control_frames, &control_frame->entry); 5585 } 5586 else if(ins->handler_idx == WINED3DSIH_ELSE) 5587 { 5588 struct list *e = list_head(&priv->control_frames); 5589 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5590 5591 if(control_frame->type == IF) 5592 { 5593 shader_addline(buffer, "#} else {\n"); 5594 if(!priv->muted && !control_frame->muting) 5595 { 5596 priv->muted = TRUE; 5597 control_frame->muting = TRUE; 5598 } 5599 else if(control_frame->muting) priv->muted = FALSE; 5600 return; /* Instruction is handled. */ 5601 } 5602 /* In case of an ifc, generate a HW shader instruction */ 5603 if (control_frame->type != IFC) 5604 ERR("Control frame does not match.\n"); 5605 } 5606 else if(ins->handler_idx == WINED3DSIH_ENDIF) 5607 { 5608 struct list *e = list_head(&priv->control_frames); 5609 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5610 5611 if(control_frame->type == IF) 5612 { 5613 shader_addline(buffer, "#} endif\n"); 5614 if(control_frame->muting) priv->muted = FALSE; 5615 list_remove(&control_frame->entry); 5616 heap_free(control_frame); 5617 return; /* Instruction is handled */ 5618 } 5619 /* In case of an ifc, generate a HW shader instruction */ 5620 if (control_frame->type != IFC) 5621 ERR("Control frame does not match.\n"); 5622 } 5623 5624 if(priv->muted) 5625 { 5626 pop_control_frame(ins); 5627 return; 5628 } 5629 5630 /* Select handler */ 5631 hw_fct = shader_arb_instruction_handler_table[ins->handler_idx]; 5632 5633 /* Unhandled opcode */ 5634 if (!hw_fct) 5635 { 5636 FIXME("Backend can't handle opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 5637 return; 5638 } 5639 hw_fct(ins); 5640 5641 pop_control_frame(ins); 5642 5643 shader_arb_add_instruction_modifiers(ins); 5644 } 5645 5646 static BOOL shader_arb_has_ffp_proj_control(void *shader_priv) 5647 { 5648 struct shader_arb_priv *priv = shader_priv; 5649 5650 return priv->ffp_proj_control; 5651 } 5652 5653 static void shader_arb_precompile(void *shader_priv, struct wined3d_shader *shader) {} 5654 5655 const struct wined3d_shader_backend_ops arb_program_shader_backend = 5656 { 5657 shader_arb_handle_instruction, 5658 shader_arb_precompile, 5659 shader_arb_select, 5660 shader_arb_select_compute, 5661 shader_arb_disable, 5662 shader_arb_update_float_vertex_constants, 5663 shader_arb_update_float_pixel_constants, 5664 shader_arb_load_constants, 5665 shader_arb_destroy, 5666 shader_arb_alloc, 5667 shader_arb_free, 5668 shader_arb_allocate_context_data, 5669 shader_arb_free_context_data, 5670 shader_arb_init_context_state, 5671 shader_arb_get_caps, 5672 shader_arb_color_fixup_supported, 5673 shader_arb_has_ffp_proj_control, 5674 }; 5675 5676 /* ARB_fragment_program fixed function pipeline replacement definitions */ 5677 #define ARB_FFP_CONST_TFACTOR 0 5678 #define ARB_FFP_CONST_COLOR_KEY_LOW ((ARB_FFP_CONST_TFACTOR) + 1) 5679 #define ARB_FFP_CONST_COLOR_KEY_HIGH ((ARB_FFP_CONST_COLOR_KEY_LOW) + 1) 5680 #define ARB_FFP_CONST_SPECULAR_ENABLE ((ARB_FFP_CONST_COLOR_KEY_HIGH) + 1) 5681 #define ARB_FFP_CONST_CONSTANT(i) ((ARB_FFP_CONST_SPECULAR_ENABLE) + 1 + i) 5682 #define ARB_FFP_CONST_BUMPMAT(i) ((ARB_FFP_CONST_CONSTANT(7)) + 1 + i) 5683 #define ARB_FFP_CONST_LUMINANCE(i) ((ARB_FFP_CONST_BUMPMAT(7)) + 1 + i) 5684 5685 struct arbfp_ffp_desc 5686 { 5687 struct ffp_frag_desc parent; 5688 GLuint shader; 5689 }; 5690 5691 /* Context activation is done by the caller. */ 5692 static void arbfp_enable(const struct wined3d_gl_info *gl_info, BOOL enable) 5693 { 5694 if (enable) 5695 { 5696 gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB); 5697 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)"); 5698 } 5699 else 5700 { 5701 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 5702 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 5703 } 5704 } 5705 5706 static void *arbfp_alloc(const struct wined3d_shader_backend_ops *shader_backend, void *shader_priv) 5707 { 5708 struct shader_arb_priv *priv; 5709 5710 /* Share private data between the shader backend and the pipeline 5711 * replacement, if both are the arb implementation. This is needed to 5712 * figure out whether ARBfp should be disabled if no pixel shader is bound 5713 * or not. */ 5714 if (shader_backend == &arb_program_shader_backend) 5715 priv = shader_priv; 5716 else if (!(priv = heap_alloc_zero(sizeof(*priv)))) 5717 return NULL; 5718 5719 wine_rb_init(&priv->fragment_shaders, wined3d_ffp_frag_program_key_compare); 5720 priv->use_arbfp_fixed_func = TRUE; 5721 5722 return priv; 5723 } 5724 5725 /* Context activation is done by the caller. */ 5726 static void arbfp_free_ffpshader(struct wine_rb_entry *entry, void *context) 5727 { 5728 const struct wined3d_gl_info *gl_info = context; 5729 struct arbfp_ffp_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_ffp_desc, parent.entry); 5730 5731 GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader)); 5732 checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)"); 5733 heap_free(entry_arb); 5734 } 5735 5736 /* Context activation is done by the caller. */ 5737 static void arbfp_free(struct wined3d_device *device) 5738 { 5739 struct shader_arb_priv *priv = device->fragment_priv; 5740 5741 wine_rb_destroy(&priv->fragment_shaders, arbfp_free_ffpshader, &device->adapter->gl_info); 5742 priv->use_arbfp_fixed_func = FALSE; 5743 5744 if (device->shader_backend != &arb_program_shader_backend) 5745 heap_free(device->fragment_priv); 5746 } 5747 5748 static void arbfp_get_caps(const struct wined3d_gl_info *gl_info, struct fragment_caps *caps) 5749 { 5750 caps->wined3d_caps = WINED3D_FRAGMENT_CAP_PROJ_CONTROL 5751 | WINED3D_FRAGMENT_CAP_SRGB_WRITE 5752 | WINED3D_FRAGMENT_CAP_COLOR_KEY; 5753 caps->PrimitiveMiscCaps = WINED3DPMISCCAPS_TSSARGTEMP; 5754 caps->TextureOpCaps = WINED3DTEXOPCAPS_DISABLE | 5755 WINED3DTEXOPCAPS_SELECTARG1 | 5756 WINED3DTEXOPCAPS_SELECTARG2 | 5757 WINED3DTEXOPCAPS_MODULATE4X | 5758 WINED3DTEXOPCAPS_MODULATE2X | 5759 WINED3DTEXOPCAPS_MODULATE | 5760 WINED3DTEXOPCAPS_ADDSIGNED2X | 5761 WINED3DTEXOPCAPS_ADDSIGNED | 5762 WINED3DTEXOPCAPS_ADD | 5763 WINED3DTEXOPCAPS_SUBTRACT | 5764 WINED3DTEXOPCAPS_ADDSMOOTH | 5765 WINED3DTEXOPCAPS_BLENDCURRENTALPHA | 5766 WINED3DTEXOPCAPS_BLENDFACTORALPHA | 5767 WINED3DTEXOPCAPS_BLENDTEXTUREALPHA | 5768 WINED3DTEXOPCAPS_BLENDDIFFUSEALPHA | 5769 WINED3DTEXOPCAPS_BLENDTEXTUREALPHAPM | 5770 WINED3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR | 5771 WINED3DTEXOPCAPS_MODULATECOLOR_ADDALPHA | 5772 WINED3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA | 5773 WINED3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR | 5774 WINED3DTEXOPCAPS_DOTPRODUCT3 | 5775 WINED3DTEXOPCAPS_MULTIPLYADD | 5776 WINED3DTEXOPCAPS_LERP | 5777 WINED3DTEXOPCAPS_BUMPENVMAP | 5778 WINED3DTEXOPCAPS_BUMPENVMAPLUMINANCE; 5779 5780 /* TODO: Implement WINED3DTEXOPCAPS_PREMODULATE */ 5781 5782 caps->MaxTextureBlendStages = MAX_TEXTURES; 5783 caps->MaxSimultaneousTextures = min(gl_info->limits.samplers[WINED3D_SHADER_TYPE_PIXEL], MAX_TEXTURES); 5784 } 5785 5786 static DWORD arbfp_get_emul_mask(const struct wined3d_gl_info *gl_info) 5787 { 5788 return GL_EXT_EMUL_ARB_MULTITEXTURE | GL_EXT_EMUL_EXT_FOG_COORD; 5789 } 5790 5791 static void state_texfactor_arbfp(struct wined3d_context *context, 5792 const struct wined3d_state *state, DWORD state_id) 5793 { 5794 const struct wined3d_gl_info *gl_info = context->gl_info; 5795 struct wined3d_device *device = context->device; 5796 struct wined3d_color color; 5797 5798 if (device->shader_backend == &arb_program_shader_backend) 5799 { 5800 struct shader_arb_priv *priv; 5801 5802 /* Don't load the parameter if we're using an arbfp pixel shader, 5803 * otherwise we'll overwrite application provided constants. */ 5804 if (use_ps(state)) 5805 return; 5806 5807 priv = device->shader_priv; 5808 priv->pshader_const_dirty[ARB_FFP_CONST_TFACTOR] = 1; 5809 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_TFACTOR + 1); 5810 } 5811 5812 wined3d_color_from_d3dcolor(&color, state->render_states[WINED3D_RS_TEXTUREFACTOR]); 5813 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, &color.r)); 5814 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, &color.r)"); 5815 } 5816 5817 static void state_tss_constant_arbfp(struct wined3d_context *context, 5818 const struct wined3d_state *state, DWORD state_id) 5819 { 5820 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 5821 const struct wined3d_gl_info *gl_info = context->gl_info; 5822 struct wined3d_device *device = context->device; 5823 struct wined3d_color color; 5824 5825 if (device->shader_backend == &arb_program_shader_backend) 5826 { 5827 struct shader_arb_priv *priv; 5828 5829 /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite 5830 * application provided constants. 5831 */ 5832 if (use_ps(state)) 5833 return; 5834 5835 priv = device->shader_priv; 5836 priv->pshader_const_dirty[ARB_FFP_CONST_CONSTANT(stage)] = 1; 5837 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_CONSTANT(stage) + 1); 5838 } 5839 5840 wined3d_color_from_d3dcolor(&color, state->texture_states[stage][WINED3D_TSS_CONSTANT]); 5841 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_CONSTANT(stage), &color.r)); 5842 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_CONSTANT(stage), &color.r)"); 5843 } 5844 5845 static void state_arb_specularenable(struct wined3d_context *context, 5846 const struct wined3d_state *state, DWORD state_id) 5847 { 5848 const struct wined3d_gl_info *gl_info = context->gl_info; 5849 struct wined3d_device *device = context->device; 5850 float col[4]; 5851 5852 if (device->shader_backend == &arb_program_shader_backend) 5853 { 5854 struct shader_arb_priv *priv; 5855 5856 /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite 5857 * application provided constants. 5858 */ 5859 if (use_ps(state)) 5860 return; 5861 5862 priv = device->shader_priv; 5863 priv->pshader_const_dirty[ARB_FFP_CONST_SPECULAR_ENABLE] = 1; 5864 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_SPECULAR_ENABLE + 1); 5865 } 5866 5867 if (state->render_states[WINED3D_RS_SPECULARENABLE]) 5868 { 5869 /* The specular color has no alpha */ 5870 col[0] = 1.0f; col[1] = 1.0f; 5871 col[2] = 1.0f; col[3] = 0.0f; 5872 } else { 5873 col[0] = 0.0f; col[1] = 0.0f; 5874 col[2] = 0.0f; col[3] = 0.0f; 5875 } 5876 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)); 5877 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)"); 5878 } 5879 5880 static void set_bumpmat_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 5881 { 5882 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 5883 const struct wined3d_gl_info *gl_info = context->gl_info; 5884 struct wined3d_device *device = context->device; 5885 float mat[2][2]; 5886 5887 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV; 5888 5889 if (device->shader_backend == &arb_program_shader_backend) 5890 { 5891 struct shader_arb_priv *priv = device->shader_priv; 5892 5893 /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants. */ 5894 if (use_ps(state)) 5895 return; 5896 5897 priv->pshader_const_dirty[ARB_FFP_CONST_BUMPMAT(stage)] = 1; 5898 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_BUMPMAT(stage) + 1); 5899 } 5900 5901 mat[0][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT00]); 5902 mat[0][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT01]); 5903 mat[1][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT10]); 5904 mat[1][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT11]); 5905 5906 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])); 5907 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])"); 5908 } 5909 5910 static void tex_bumpenvlum_arbfp(struct wined3d_context *context, 5911 const struct wined3d_state *state, DWORD state_id) 5912 { 5913 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 5914 const struct wined3d_gl_info *gl_info = context->gl_info; 5915 struct wined3d_device *device = context->device; 5916 float param[4]; 5917 5918 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV; 5919 5920 if (device->shader_backend == &arb_program_shader_backend) 5921 { 5922 struct shader_arb_priv *priv = device->shader_priv; 5923 5924 /* Exit now, don't set the luminance below, otherwise we may overwrite pixel shader constants. */ 5925 if (use_ps(state)) 5926 return; 5927 5928 priv->pshader_const_dirty[ARB_FFP_CONST_LUMINANCE(stage)] = 1; 5929 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_LUMINANCE(stage) + 1); 5930 } 5931 5932 param[0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LSCALE]); 5933 param[1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LOFFSET]); 5934 param[2] = 0.0f; 5935 param[3] = 0.0f; 5936 5937 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)); 5938 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)"); 5939 } 5940 5941 static void alpha_test_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 5942 { 5943 const struct wined3d_gl_info *gl_info = context->gl_info; 5944 int glParm; 5945 float ref; 5946 5947 TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 5948 5949 if (state->render_states[WINED3D_RS_ALPHATESTENABLE]) 5950 { 5951 gl_info->gl_ops.gl.p_glEnable(GL_ALPHA_TEST); 5952 checkGLcall("glEnable GL_ALPHA_TEST"); 5953 } 5954 else 5955 { 5956 gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST); 5957 checkGLcall("glDisable GL_ALPHA_TEST"); 5958 return; 5959 } 5960 5961 ref = ((float)state->render_states[WINED3D_RS_ALPHAREF]) / 255.0f; 5962 glParm = wined3d_gl_compare_func(state->render_states[WINED3D_RS_ALPHAFUNC]); 5963 5964 if (glParm) 5965 { 5966 gl_info->gl_ops.gl.p_glAlphaFunc(glParm, ref); 5967 checkGLcall("glAlphaFunc"); 5968 } 5969 } 5970 5971 static void color_key_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 5972 { 5973 const struct wined3d_texture *texture = state->textures[0]; 5974 const struct wined3d_gl_info *gl_info = context->gl_info; 5975 struct wined3d_device *device = context->device; 5976 struct wined3d_color float_key[2]; 5977 5978 if (!texture) 5979 return; 5980 5981 if (device->shader_backend == &arb_program_shader_backend) 5982 { 5983 struct shader_arb_priv *priv; 5984 5985 /* Don't load the parameter if we're using an arbfp pixel shader, 5986 * otherwise we'll overwrite application provided constants. */ 5987 if (use_ps(state)) 5988 return; 5989 5990 priv = device->shader_priv; 5991 priv->pshader_const_dirty[ARB_FFP_CONST_COLOR_KEY_LOW] = 1; 5992 priv->pshader_const_dirty[ARB_FFP_CONST_COLOR_KEY_HIGH] = 1; 5993 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_COLOR_KEY_HIGH + 1); 5994 } 5995 5996 wined3d_format_get_float_color_key(texture->resource.format, &texture->async.src_blt_color_key, float_key); 5997 5998 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_LOW, &float_key[0].r)); 5999 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_LOW, &float_key[0].r)"); 6000 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_HIGH, &float_key[1].r)); 6001 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_HIGH, &float_key[1].r)"); 6002 } 6003 6004 static const char *get_argreg(struct wined3d_string_buffer *buffer, DWORD argnum, unsigned int stage, DWORD arg) 6005 { 6006 const char *ret; 6007 6008 if(arg == ARG_UNUSED) return "unused"; /* This is the marker for unused registers */ 6009 6010 switch(arg & WINED3DTA_SELECTMASK) { 6011 case WINED3DTA_DIFFUSE: 6012 ret = "fragment.color.primary"; break; 6013 6014 case WINED3DTA_CURRENT: 6015 ret = "ret"; 6016 break; 6017 6018 case WINED3DTA_TEXTURE: 6019 switch(stage) { 6020 case 0: ret = "tex0"; break; 6021 case 1: ret = "tex1"; break; 6022 case 2: ret = "tex2"; break; 6023 case 3: ret = "tex3"; break; 6024 case 4: ret = "tex4"; break; 6025 case 5: ret = "tex5"; break; 6026 case 6: ret = "tex6"; break; 6027 case 7: ret = "tex7"; break; 6028 default: ret = "unknown texture"; 6029 } 6030 break; 6031 6032 case WINED3DTA_TFACTOR: 6033 ret = "tfactor"; break; 6034 6035 case WINED3DTA_SPECULAR: 6036 ret = "fragment.color.secondary"; break; 6037 6038 case WINED3DTA_TEMP: 6039 ret = "tempreg"; break; 6040 6041 case WINED3DTA_CONSTANT: 6042 switch(stage) { 6043 case 0: ret = "const0"; break; 6044 case 1: ret = "const1"; break; 6045 case 2: ret = "const2"; break; 6046 case 3: ret = "const3"; break; 6047 case 4: ret = "const4"; break; 6048 case 5: ret = "const5"; break; 6049 case 6: ret = "const6"; break; 6050 case 7: ret = "const7"; break; 6051 default: ret = "unknown constant"; 6052 } 6053 break; 6054 6055 default: 6056 return "unknown"; 6057 } 6058 6059 if(arg & WINED3DTA_COMPLEMENT) { 6060 shader_addline(buffer, "SUB arg%u, const.x, %s;\n", argnum, ret); 6061 if(argnum == 0) ret = "arg0"; 6062 if(argnum == 1) ret = "arg1"; 6063 if(argnum == 2) ret = "arg2"; 6064 } 6065 if(arg & WINED3DTA_ALPHAREPLICATE) { 6066 shader_addline(buffer, "MOV arg%u, %s.w;\n", argnum, ret); 6067 if(argnum == 0) ret = "arg0"; 6068 if(argnum == 1) ret = "arg1"; 6069 if(argnum == 2) ret = "arg2"; 6070 } 6071 return ret; 6072 } 6073 6074 static void gen_ffp_instr(struct wined3d_string_buffer *buffer, unsigned int stage, BOOL color, 6075 BOOL alpha, BOOL tmp_dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2) 6076 { 6077 const char *dstmask, *dstreg, *arg0, *arg1, *arg2; 6078 unsigned int mul = 1; 6079 6080 if (color && alpha) 6081 dstmask = ""; 6082 else if (color) 6083 dstmask = ".xyz"; 6084 else 6085 dstmask = ".w"; 6086 6087 dstreg = tmp_dst ? "tempreg" : "ret"; 6088 6089 arg0 = get_argreg(buffer, 0, stage, dw_arg0); 6090 arg1 = get_argreg(buffer, 1, stage, dw_arg1); 6091 arg2 = get_argreg(buffer, 2, stage, dw_arg2); 6092 6093 switch (op) 6094 { 6095 case WINED3D_TOP_DISABLE: 6096 break; 6097 6098 case WINED3D_TOP_SELECT_ARG2: 6099 arg1 = arg2; 6100 /* FALLTHROUGH */ 6101 case WINED3D_TOP_SELECT_ARG1: 6102 shader_addline(buffer, "MOV %s%s, %s;\n", dstreg, dstmask, arg1); 6103 break; 6104 6105 case WINED3D_TOP_MODULATE_4X: 6106 mul = 2; 6107 /* FALLTHROUGH */ 6108 case WINED3D_TOP_MODULATE_2X: 6109 mul *= 2; 6110 /* FALLTHROUGH */ 6111 case WINED3D_TOP_MODULATE: 6112 shader_addline(buffer, "MUL %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 6113 break; 6114 6115 case WINED3D_TOP_ADD_SIGNED_2X: 6116 mul = 2; 6117 /* FALLTHROUGH */ 6118 case WINED3D_TOP_ADD_SIGNED: 6119 shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2); 6120 arg2 = "arg2"; 6121 /* FALLTHROUGH */ 6122 case WINED3D_TOP_ADD: 6123 shader_addline(buffer, "ADD_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 6124 break; 6125 6126 case WINED3D_TOP_SUBTRACT: 6127 shader_addline(buffer, "SUB_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 6128 break; 6129 6130 case WINED3D_TOP_ADD_SMOOTH: 6131 shader_addline(buffer, "SUB arg1, const.x, %s;\n", arg1); 6132 shader_addline(buffer, "MAD_SAT %s%s, arg1, %s, %s;\n", dstreg, dstmask, arg2, arg1); 6133 break; 6134 6135 case WINED3D_TOP_BLEND_CURRENT_ALPHA: 6136 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_CURRENT); 6137 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6138 break; 6139 case WINED3D_TOP_BLEND_FACTOR_ALPHA: 6140 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TFACTOR); 6141 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6142 break; 6143 case WINED3D_TOP_BLEND_TEXTURE_ALPHA: 6144 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE); 6145 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6146 break; 6147 case WINED3D_TOP_BLEND_DIFFUSE_ALPHA: 6148 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_DIFFUSE); 6149 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6150 break; 6151 6152 case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM: 6153 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE); 6154 shader_addline(buffer, "SUB arg0.w, const.x, %s.w;\n", arg0); 6155 shader_addline(buffer, "MAD_SAT %s%s, %s, arg0.w, %s;\n", dstreg, dstmask, arg2, arg1); 6156 break; 6157 6158 /* D3DTOP_PREMODULATE ???? */ 6159 6160 case WINED3D_TOP_MODULATE_INVALPHA_ADD_COLOR: 6161 shader_addline(buffer, "SUB arg0.w, const.x, %s;\n", arg1); 6162 shader_addline(buffer, "MAD_SAT %s%s, arg0.w, %s, %s;\n", dstreg, dstmask, arg2, arg1); 6163 break; 6164 case WINED3D_TOP_MODULATE_ALPHA_ADD_COLOR: 6165 shader_addline(buffer, "MAD_SAT %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg1); 6166 break; 6167 case WINED3D_TOP_MODULATE_INVCOLOR_ADD_ALPHA: 6168 shader_addline(buffer, "SUB arg0, const.x, %s;\n", arg1); 6169 shader_addline(buffer, "MAD_SAT %s%s, arg0, %s, %s.w;\n", dstreg, dstmask, arg2, arg1); 6170 break; 6171 case WINED3D_TOP_MODULATE_COLOR_ADD_ALPHA: 6172 shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s.w;\n", dstreg, dstmask, arg1, arg2, arg1); 6173 break; 6174 6175 case WINED3D_TOP_DOTPRODUCT3: 6176 mul = 4; 6177 shader_addline(buffer, "SUB arg1, %s, const.w;\n", arg1); 6178 shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2); 6179 shader_addline(buffer, "DP3_SAT %s%s, arg1, arg2;\n", dstreg, dstmask); 6180 break; 6181 6182 case WINED3D_TOP_MULTIPLY_ADD: 6183 shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg0); 6184 break; 6185 6186 case WINED3D_TOP_LERP: 6187 /* The msdn is not quite right here */ 6188 shader_addline(buffer, "LRP %s%s, %s, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6189 break; 6190 6191 case WINED3D_TOP_BUMPENVMAP: 6192 case WINED3D_TOP_BUMPENVMAP_LUMINANCE: 6193 /* Those are handled in the first pass of the shader(generation pass 1 and 2) already */ 6194 break; 6195 6196 default: 6197 FIXME("Unhandled texture op %08x\n", op); 6198 } 6199 6200 if (mul == 2) 6201 shader_addline(buffer, "MUL_SAT %s%s, %s, const.y;\n", dstreg, dstmask, dstreg); 6202 else if (mul == 4) 6203 shader_addline(buffer, "MUL_SAT %s%s, %s, const.z;\n", dstreg, dstmask, dstreg); 6204 } 6205 6206 static const char *arbfp_texture_target(enum wined3d_gl_resource_type type) 6207 { 6208 switch(type) 6209 { 6210 case WINED3D_GL_RES_TYPE_TEX_1D: 6211 return "1D"; 6212 case WINED3D_GL_RES_TYPE_TEX_2D: 6213 return "2D"; 6214 case WINED3D_GL_RES_TYPE_TEX_3D: 6215 return "3D"; 6216 case WINED3D_GL_RES_TYPE_TEX_CUBE: 6217 return "CUBE"; 6218 case WINED3D_GL_RES_TYPE_TEX_RECT: 6219 return "RECT"; 6220 default: 6221 return "unexpected_resource_type"; 6222 } 6223 } 6224 6225 static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, const struct wined3d_gl_info *gl_info) 6226 { 6227 BYTE tex_read = 0, bump_used = 0, luminance_used = 0, constant_used = 0; 6228 BOOL tempreg_used = FALSE, tfactor_used = FALSE; 6229 unsigned int stage, lowest_disabled_stage; 6230 struct wined3d_string_buffer buffer; 6231 struct color_fixup_masks masks; 6232 BOOL custom_linear_fog = FALSE; 6233 const char *textype, *instr; 6234 DWORD arg0, arg1, arg2; 6235 char colorcor_dst[8]; 6236 BOOL op_equal; 6237 GLuint ret; 6238 6239 if (!string_buffer_init(&buffer)) 6240 { 6241 ERR("Failed to initialize shader buffer.\n"); 6242 return 0; 6243 } 6244 6245 shader_addline(&buffer, "!!ARBfp1.0\n"); 6246 6247 if (settings->color_key_enabled) 6248 { 6249 shader_addline(&buffer, "PARAM color_key_low = program.env[%u];\n", ARB_FFP_CONST_COLOR_KEY_LOW); 6250 shader_addline(&buffer, "PARAM color_key_high = program.env[%u];\n", ARB_FFP_CONST_COLOR_KEY_HIGH); 6251 tex_read |= 1; 6252 } 6253 6254 /* Find out which textures are read */ 6255 for (stage = 0; stage < MAX_TEXTURES; ++stage) 6256 { 6257 if (settings->op[stage].cop == WINED3D_TOP_DISABLE) 6258 break; 6259 6260 arg0 = settings->op[stage].carg0 & WINED3DTA_SELECTMASK; 6261 arg1 = settings->op[stage].carg1 & WINED3DTA_SELECTMASK; 6262 arg2 = settings->op[stage].carg2 & WINED3DTA_SELECTMASK; 6263 6264 if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE) 6265 tex_read |= 1u << stage; 6266 if (settings->op[stage].tmp_dst) 6267 tempreg_used = TRUE; 6268 if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) 6269 tempreg_used = TRUE; 6270 if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) 6271 tfactor_used = TRUE; 6272 if (arg0 == WINED3DTA_CONSTANT || arg1 == WINED3DTA_CONSTANT || arg2 == WINED3DTA_CONSTANT) 6273 constant_used |= 1u << stage; 6274 6275 switch (settings->op[stage].cop) 6276 { 6277 case WINED3D_TOP_BUMPENVMAP_LUMINANCE: 6278 luminance_used |= 1u << stage; 6279 /* fall through */ 6280 case WINED3D_TOP_BUMPENVMAP: 6281 bump_used |= 1u << stage; 6282 /* fall through */ 6283 case WINED3D_TOP_BLEND_TEXTURE_ALPHA: 6284 case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM: 6285 tex_read |= 1u << stage; 6286 break; 6287 6288 case WINED3D_TOP_BLEND_FACTOR_ALPHA: 6289 tfactor_used = TRUE; 6290 break; 6291 6292 default: 6293 break; 6294 } 6295 6296 if (settings->op[stage].aop == WINED3D_TOP_DISABLE) 6297 continue; 6298 6299 arg0 = settings->op[stage].aarg0 & WINED3DTA_SELECTMASK; 6300 arg1 = settings->op[stage].aarg1 & WINED3DTA_SELECTMASK; 6301 arg2 = settings->op[stage].aarg2 & WINED3DTA_SELECTMASK; 6302 6303 if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE) 6304 tex_read |= 1u << stage; 6305 if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) 6306 tempreg_used = TRUE; 6307 if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) 6308 tfactor_used = TRUE; 6309 if (arg0 == WINED3DTA_CONSTANT || arg1 == WINED3DTA_CONSTANT || arg2 == WINED3DTA_CONSTANT) 6310 constant_used |= 1u << stage; 6311 } 6312 lowest_disabled_stage = stage; 6313 6314 switch (settings->fog) 6315 { 6316 case WINED3D_FFP_PS_FOG_OFF: break; 6317 case WINED3D_FFP_PS_FOG_LINEAR: 6318 if (gl_info->quirks & WINED3D_QUIRK_BROKEN_ARB_FOG) 6319 { 6320 custom_linear_fog = TRUE; 6321 break; 6322 } 6323 shader_addline(&buffer, "OPTION ARB_fog_linear;\n"); 6324 break; 6325 6326 case WINED3D_FFP_PS_FOG_EXP: shader_addline(&buffer, "OPTION ARB_fog_exp;\n"); break; 6327 case WINED3D_FFP_PS_FOG_EXP2: shader_addline(&buffer, "OPTION ARB_fog_exp2;\n"); break; 6328 default: FIXME("Unexpected fog setting %d\n", settings->fog); 6329 } 6330 6331 shader_addline(&buffer, "PARAM const = {1, 2, 4, 0.5};\n"); 6332 shader_addline(&buffer, "TEMP TMP;\n"); 6333 shader_addline(&buffer, "TEMP ret;\n"); 6334 if (tempreg_used || settings->sRGB_write) shader_addline(&buffer, "TEMP tempreg;\n"); 6335 shader_addline(&buffer, "TEMP arg0;\n"); 6336 shader_addline(&buffer, "TEMP arg1;\n"); 6337 shader_addline(&buffer, "TEMP arg2;\n"); 6338 for (stage = 0; stage < MAX_TEXTURES; ++stage) 6339 { 6340 if (constant_used & (1u << stage)) 6341 shader_addline(&buffer, "PARAM const%u = program.env[%u];\n", stage, ARB_FFP_CONST_CONSTANT(stage)); 6342 6343 if (!(tex_read & (1u << stage))) 6344 continue; 6345 6346 shader_addline(&buffer, "TEMP tex%u;\n", stage); 6347 6348 if (!(bump_used & (1u << stage))) 6349 continue; 6350 shader_addline(&buffer, "PARAM bumpmat%u = program.env[%u];\n", stage, ARB_FFP_CONST_BUMPMAT(stage)); 6351 6352 if (!(luminance_used & (1u << stage))) 6353 continue; 6354 shader_addline(&buffer, "PARAM luminance%u = program.env[%u];\n", stage, ARB_FFP_CONST_LUMINANCE(stage)); 6355 } 6356 if (tfactor_used) 6357 shader_addline(&buffer, "PARAM tfactor = program.env[%u];\n", ARB_FFP_CONST_TFACTOR); 6358 shader_addline(&buffer, "PARAM specular_enable = program.env[%u];\n", ARB_FFP_CONST_SPECULAR_ENABLE); 6359 6360 if (settings->sRGB_write) 6361 { 6362 shader_addline(&buffer, "PARAM srgb_consts0 = "); 6363 shader_arb_append_imm_vec4(&buffer, wined3d_srgb_const0); 6364 shader_addline(&buffer, ";\n"); 6365 shader_addline(&buffer, "PARAM srgb_consts1 = "); 6366 shader_arb_append_imm_vec4(&buffer, wined3d_srgb_const1); 6367 shader_addline(&buffer, ";\n"); 6368 } 6369 6370 if (lowest_disabled_stage < 7 && settings->emul_clipplanes) 6371 shader_addline(&buffer, "KIL fragment.texcoord[7];\n"); 6372 6373 if (tempreg_used || settings->sRGB_write) 6374 shader_addline(&buffer, "MOV tempreg, 0.0;\n"); 6375 6376 /* Generate texture sampling instructions */ 6377 for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) 6378 { 6379 if (!(tex_read & (1u << stage))) 6380 continue; 6381 6382 textype = arbfp_texture_target(settings->op[stage].tex_type); 6383 6384 if (settings->op[stage].projected == WINED3D_PROJECTION_NONE) 6385 { 6386 instr = "TEX"; 6387 } 6388 else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT4 6389 || settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) 6390 { 6391 instr = "TXP"; 6392 } 6393 else 6394 { 6395 FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); 6396 instr = "TXP"; 6397 } 6398 6399 if (stage > 0 6400 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP 6401 || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) 6402 { 6403 shader_addline(&buffer, "SWZ arg1, bumpmat%u, x, z, 0, 0;\n", stage - 1); 6404 shader_addline(&buffer, "DP3 ret.x, arg1, tex%u;\n", stage - 1); 6405 shader_addline(&buffer, "SWZ arg1, bumpmat%u, y, w, 0, 0;\n", stage - 1); 6406 shader_addline(&buffer, "DP3 ret.y, arg1, tex%u;\n", stage - 1); 6407 6408 /* With projective textures, texbem only divides the static 6409 * texture coordinate, not the displacement, so multiply the 6410 * displacement with the dividing parameter before passing it to 6411 * TXP. */ 6412 if (settings->op[stage].projected != WINED3D_PROJECTION_NONE) 6413 { 6414 if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT4) 6415 { 6416 shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].w;\n", stage); 6417 shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n", 6418 stage, stage); 6419 } 6420 else 6421 { 6422 shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].z;\n", stage); 6423 shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n", 6424 stage, stage); 6425 } 6426 } 6427 else 6428 { 6429 shader_addline(&buffer, "ADD ret, ret, fragment.texcoord[%u];\n", stage); 6430 } 6431 6432 shader_addline(&buffer, "%s tex%u, ret, texture[%u], %s;\n", 6433 instr, stage, stage, textype); 6434 if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) 6435 { 6436 shader_addline(&buffer, "MAD_SAT ret.x, tex%u.z, luminance%u.x, luminance%u.y;\n", 6437 stage - 1, stage - 1, stage - 1); 6438 shader_addline(&buffer, "MUL tex%u, tex%u, ret.x;\n", stage, stage); 6439 } 6440 } 6441 else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) 6442 { 6443 shader_addline(&buffer, "MOV ret, fragment.texcoord[%u];\n", stage); 6444 shader_addline(&buffer, "MOV ret.w, ret.z;\n"); 6445 shader_addline(&buffer, "%s tex%u, ret, texture[%u], %s;\n", 6446 instr, stage, stage, textype); 6447 } 6448 else 6449 { 6450 shader_addline(&buffer, "%s tex%u, fragment.texcoord[%u], texture[%u], %s;\n", 6451 instr, stage, stage, stage, textype); 6452 } 6453 6454 sprintf(colorcor_dst, "tex%u", stage); 6455 masks = calc_color_correction(settings->op[stage].color_fixup, WINED3DSP_WRITEMASK_ALL); 6456 gen_color_correction(&buffer, colorcor_dst, colorcor_dst, "const.x", "const.y", 6457 settings->op[stage].color_fixup, masks); 6458 } 6459 6460 if (settings->color_key_enabled) 6461 { 6462 shader_addline(&buffer, "SLT TMP, tex0, color_key_low;\n"); /* below low key */ 6463 shader_addline(&buffer, "SGE ret, tex0, color_key_high;\n"); /* above high key */ 6464 shader_addline(&buffer, "ADD TMP, TMP, ret;\n"); /* or */ 6465 shader_addline(&buffer, "DP4 TMP.b, TMP, TMP;\n"); /* on any channel */ 6466 shader_addline(&buffer, "SGE TMP, -TMP.b, 0.0;\n"); /* logical not */ 6467 shader_addline(&buffer, "KIL -TMP;\n"); /* discard if true */ 6468 } 6469 6470 shader_addline(&buffer, "MOV ret, fragment.color.primary;\n"); 6471 6472 /* Generate the main shader */ 6473 for (stage = 0; stage < MAX_TEXTURES; ++stage) 6474 { 6475 if (settings->op[stage].cop == WINED3D_TOP_DISABLE) 6476 break; 6477 6478 if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1 6479 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1) 6480 op_equal = settings->op[stage].carg1 == settings->op[stage].aarg1; 6481 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1 6482 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2) 6483 op_equal = settings->op[stage].carg1 == settings->op[stage].aarg2; 6484 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2 6485 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1) 6486 op_equal = settings->op[stage].carg2 == settings->op[stage].aarg1; 6487 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2 6488 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2) 6489 op_equal = settings->op[stage].carg2 == settings->op[stage].aarg2; 6490 else 6491 op_equal = settings->op[stage].aop == settings->op[stage].cop 6492 && settings->op[stage].carg0 == settings->op[stage].aarg0 6493 && settings->op[stage].carg1 == settings->op[stage].aarg1 6494 && settings->op[stage].carg2 == settings->op[stage].aarg2; 6495 6496 if (settings->op[stage].aop == WINED3D_TOP_DISABLE) 6497 { 6498 gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].tmp_dst, 6499 settings->op[stage].cop, settings->op[stage].carg0, 6500 settings->op[stage].carg1, settings->op[stage].carg2); 6501 } 6502 else if (op_equal) 6503 { 6504 gen_ffp_instr(&buffer, stage, TRUE, TRUE, settings->op[stage].tmp_dst, 6505 settings->op[stage].cop, settings->op[stage].carg0, 6506 settings->op[stage].carg1, settings->op[stage].carg2); 6507 } 6508 else if (settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP 6509 && settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP_LUMINANCE) 6510 { 6511 gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].tmp_dst, 6512 settings->op[stage].cop, settings->op[stage].carg0, 6513 settings->op[stage].carg1, settings->op[stage].carg2); 6514 gen_ffp_instr(&buffer, stage, FALSE, TRUE, settings->op[stage].tmp_dst, 6515 settings->op[stage].aop, settings->op[stage].aarg0, 6516 settings->op[stage].aarg1, settings->op[stage].aarg2); 6517 } 6518 } 6519 6520 if (settings->sRGB_write || custom_linear_fog) 6521 { 6522 shader_addline(&buffer, "MAD ret, fragment.color.secondary, specular_enable, ret;\n"); 6523 if (settings->sRGB_write) 6524 arbfp_add_sRGB_correction(&buffer, "ret", "arg0", "arg1", "arg2", "tempreg", FALSE); 6525 if (custom_linear_fog) 6526 arbfp_add_linear_fog(&buffer, "ret", "arg0"); 6527 shader_addline(&buffer, "MOV result.color, ret;\n"); 6528 } 6529 else 6530 { 6531 shader_addline(&buffer, "MAD result.color, fragment.color.secondary, specular_enable, ret;\n"); 6532 } 6533 6534 /* Footer */ 6535 shader_addline(&buffer, "END\n"); 6536 6537 /* Generate the shader */ 6538 GL_EXTCALL(glGenProgramsARB(1, &ret)); 6539 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ret)); 6540 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 6541 6542 string_buffer_free(&buffer); 6543 return ret; 6544 } 6545 6546 static void fragment_prog_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 6547 { 6548 const struct wined3d_gl_info *gl_info = context->gl_info; 6549 const struct wined3d_device *device = context->device; 6550 struct shader_arb_priv *priv = device->fragment_priv; 6551 BOOL use_pshader = use_ps(state); 6552 struct ffp_frag_settings settings; 6553 const struct arbfp_ffp_desc *desc; 6554 unsigned int i; 6555 6556 TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 6557 6558 if (isStateDirty(context, STATE_RENDER(WINED3D_RS_FOGENABLE))) 6559 { 6560 if (!use_pshader && device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) 6561 { 6562 /* Reload fixed function constants since they collide with the 6563 * pixel shader constants. */ 6564 for (i = 0; i < MAX_TEXTURES; ++i) 6565 { 6566 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00)); 6567 state_tss_constant_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_CONSTANT)); 6568 } 6569 state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR)); 6570 state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE)); 6571 color_key_arbfp(context, state, STATE_COLOR_KEY); 6572 } 6573 else if (use_pshader) 6574 { 6575 context->shader_update_mask |= 1u << WINED3D_SHADER_TYPE_PIXEL; 6576 } 6577 return; 6578 } 6579 6580 if (!use_pshader) 6581 { 6582 /* Find or create a shader implementing the fixed function pipeline 6583 * settings, then activate it. */ 6584 gen_ffp_frag_op(context, state, &settings, FALSE); 6585 desc = (const struct arbfp_ffp_desc *)find_ffp_frag_shader(&priv->fragment_shaders, &settings); 6586 if (!desc) 6587 { 6588 struct arbfp_ffp_desc *new_desc; 6589 6590 if (!(new_desc = heap_alloc(sizeof(*new_desc)))) 6591 { 6592 ERR("Out of memory\n"); 6593 return; 6594 } 6595 6596 new_desc->parent.settings = settings; 6597 new_desc->shader = gen_arbfp_ffp_shader(&settings, gl_info); 6598 add_ffp_frag_shader(&priv->fragment_shaders, &new_desc->parent); 6599 TRACE("Allocated fixed function replacement shader descriptor %p\n", new_desc); 6600 desc = new_desc; 6601 } 6602 6603 /* Now activate the replacement program. GL_FRAGMENT_PROGRAM_ARB is already active (however, note the 6604 * comment above the shader_select call below). If e.g. GLSL is active, the shader_select call will 6605 * deactivate it. 6606 */ 6607 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)); 6608 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)"); 6609 priv->current_fprogram_id = desc->shader; 6610 6611 if (device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) 6612 { 6613 /* Reload fixed function constants since they collide with the 6614 * pixel shader constants. */ 6615 for (i = 0; i < MAX_TEXTURES; ++i) 6616 { 6617 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00)); 6618 state_tss_constant_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_CONSTANT)); 6619 } 6620 state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR)); 6621 state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE)); 6622 color_key_arbfp(context, state, STATE_COLOR_KEY); 6623 } 6624 context->last_was_pshader = FALSE; 6625 } 6626 else if (!context->last_was_pshader) 6627 { 6628 if (device->shader_backend == &arb_program_shader_backend) 6629 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_F; 6630 context->last_was_pshader = TRUE; 6631 } 6632 6633 context->shader_update_mask |= 1u << WINED3D_SHADER_TYPE_PIXEL; 6634 } 6635 6636 /* We can't link the fog states to the fragment state directly since the 6637 * vertex pipeline links them to FOGENABLE. A different linking in different 6638 * pipeline parts can't be expressed in the combined state table, so we need 6639 * to handle that with a forwarding function. The other invisible side effect 6640 * is that changing the fog start and fog end (which links to FOGENABLE in 6641 * vertex) results in the fragment_prog_arbfp function being called because 6642 * FOGENABLE is dirty, which calls this function here. */ 6643 static void state_arbfp_fog(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 6644 { 6645 enum fogsource new_source; 6646 DWORD fogstart = state->render_states[WINED3D_RS_FOGSTART]; 6647 DWORD fogend = state->render_states[WINED3D_RS_FOGEND]; 6648 6649 TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 6650 6651 if (!isStateDirty(context, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL))) 6652 fragment_prog_arbfp(context, state, state_id); 6653 6654 if (!state->render_states[WINED3D_RS_FOGENABLE]) 6655 return; 6656 6657 if (state->render_states[WINED3D_RS_FOGTABLEMODE] == WINED3D_FOG_NONE) 6658 { 6659 if (use_vs(state)) 6660 { 6661 new_source = FOGSOURCE_VS; 6662 } 6663 else 6664 { 6665 if (state->render_states[WINED3D_RS_FOGVERTEXMODE] == WINED3D_FOG_NONE || context->last_was_rhw) 6666 new_source = FOGSOURCE_COORD; 6667 else 6668 new_source = FOGSOURCE_FFP; 6669 } 6670 } 6671 else 6672 { 6673 new_source = FOGSOURCE_FFP; 6674 } 6675 6676 if (new_source != context->fog_source || fogstart == fogend) 6677 { 6678 context->fog_source = new_source; 6679 state_fogstartend(context, state, STATE_RENDER(WINED3D_RS_FOGSTART)); 6680 } 6681 } 6682 6683 static void textransform(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 6684 { 6685 if (!isStateDirty(context, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL))) 6686 fragment_prog_arbfp(context, state, state_id); 6687 } 6688 6689 static const struct StateEntryTemplate arbfp_fragmentstate_template[] = 6690 { 6691 {STATE_RENDER(WINED3D_RS_TEXTUREFACTOR), { STATE_RENDER(WINED3D_RS_TEXTUREFACTOR), state_texfactor_arbfp }, WINED3D_GL_EXT_NONE }, 6692 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6693 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6694 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6695 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6696 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6697 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6698 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6699 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6700 {STATE_TEXTURESTAGE(0, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6701 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6702 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6703 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6704 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6705 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6706 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6707 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6708 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6709 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6710 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6711 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6712 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6713 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6714 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6715 {STATE_TEXTURESTAGE(1, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6716 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6717 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6718 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6719 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6720 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6721 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6722 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6723 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6724 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6725 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6726 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6727 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6728 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6729 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6730 {STATE_TEXTURESTAGE(2, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6731 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6732 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6733 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6734 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6735 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6736 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6737 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6738 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6739 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6740 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6741 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6742 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6743 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6744 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6745 {STATE_TEXTURESTAGE(3, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6746 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6747 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6748 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6749 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6750 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6751 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6752 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6753 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6754 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6755 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6756 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6757 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6758 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6759 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6760 {STATE_TEXTURESTAGE(4, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6761 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6762 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6763 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6764 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6765 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6766 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6767 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6768 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6769 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6770 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6771 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6772 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6773 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6774 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6775 {STATE_TEXTURESTAGE(5, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6776 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6777 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6778 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6779 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6780 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6781 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6782 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6783 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6784 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6785 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6786 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6787 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6788 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6789 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6790 {STATE_TEXTURESTAGE(6, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6791 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6792 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6793 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6794 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6795 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6796 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6797 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6798 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6799 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6800 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6801 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6802 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6803 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6804 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6805 {STATE_TEXTURESTAGE(7, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6806 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6807 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6808 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6809 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6810 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6811 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6812 {STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), fragment_prog_arbfp }, WINED3D_GL_EXT_NONE }, 6813 {STATE_RENDER(WINED3D_RS_ALPHAFUNC), { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6814 {STATE_RENDER(WINED3D_RS_ALPHAREF), { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6815 {STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), alpha_test_arbfp }, WINED3D_GL_EXT_NONE }, 6816 {STATE_RENDER(WINED3D_RS_COLORKEYENABLE), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6817 {STATE_COLOR_KEY, { STATE_COLOR_KEY, color_key_arbfp }, WINED3D_GL_EXT_NONE }, 6818 {STATE_RENDER(WINED3D_RS_FOGENABLE), { STATE_RENDER(WINED3D_RS_FOGENABLE), state_arbfp_fog }, WINED3D_GL_EXT_NONE }, 6819 {STATE_RENDER(WINED3D_RS_FOGTABLEMODE), { STATE_RENDER(WINED3D_RS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6820 {STATE_RENDER(WINED3D_RS_FOGVERTEXMODE), { STATE_RENDER(WINED3D_RS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6821 {STATE_RENDER(WINED3D_RS_FOGSTART), { STATE_RENDER(WINED3D_RS_FOGSTART), state_fogstartend }, WINED3D_GL_EXT_NONE }, 6822 {STATE_RENDER(WINED3D_RS_FOGEND), { STATE_RENDER(WINED3D_RS_FOGSTART), NULL }, WINED3D_GL_EXT_NONE }, 6823 {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE), { STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE), state_srgbwrite }, ARB_FRAMEBUFFER_SRGB }, 6824 {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6825 {STATE_RENDER(WINED3D_RS_FOGCOLOR), { STATE_RENDER(WINED3D_RS_FOGCOLOR), state_fogcolor }, WINED3D_GL_EXT_NONE }, 6826 {STATE_RENDER(WINED3D_RS_FOGDENSITY), { STATE_RENDER(WINED3D_RS_FOGDENSITY), state_fogdensity }, WINED3D_GL_EXT_NONE }, 6827 {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6828 {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6829 {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6830 {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6831 {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6832 {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6833 {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6834 {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6835 {STATE_TEXTURESTAGE(0, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(0, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6836 {STATE_TEXTURESTAGE(1, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(1, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6837 {STATE_TEXTURESTAGE(2, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(2, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6838 {STATE_TEXTURESTAGE(3, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(3, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6839 {STATE_TEXTURESTAGE(4, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(4, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6840 {STATE_TEXTURESTAGE(5, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(5, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6841 {STATE_TEXTURESTAGE(6, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(6, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6842 {STATE_TEXTURESTAGE(7, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(7, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6843 {STATE_RENDER(WINED3D_RS_SPECULARENABLE), { STATE_RENDER(WINED3D_RS_SPECULARENABLE), state_arb_specularenable}, WINED3D_GL_EXT_NONE }, 6844 {STATE_RENDER(WINED3D_RS_SHADEMODE), { STATE_RENDER(WINED3D_RS_SHADEMODE), state_shademode }, WINED3D_GL_EXT_NONE }, 6845 {0 /* Terminate */, { 0, 0 }, WINED3D_GL_EXT_NONE }, 6846 }; 6847 6848 static BOOL arbfp_alloc_context_data(struct wined3d_context *context) 6849 { 6850 return TRUE; 6851 } 6852 6853 static void arbfp_free_context_data(struct wined3d_context *context) 6854 { 6855 } 6856 6857 const struct fragment_pipeline arbfp_fragment_pipeline = { 6858 arbfp_enable, 6859 arbfp_get_caps, 6860 arbfp_get_emul_mask, 6861 arbfp_alloc, 6862 arbfp_free, 6863 arbfp_alloc_context_data, 6864 arbfp_free_context_data, 6865 shader_arb_color_fixup_supported, 6866 arbfp_fragmentstate_template, 6867 }; 6868 6869 struct arbfp_blit_type 6870 { 6871 enum complex_fixup fixup : 4; 6872 enum wined3d_gl_resource_type res_type : 3; 6873 DWORD use_color_key : 1; 6874 DWORD padding : 24; 6875 }; 6876 6877 struct arbfp_blit_desc 6878 { 6879 GLuint shader; 6880 struct arbfp_blit_type type; 6881 struct wine_rb_entry entry; 6882 }; 6883 6884 #define ARBFP_BLIT_PARAM_SIZE 0 6885 #define ARBFP_BLIT_PARAM_COLOR_KEY_LOW 1 6886 #define ARBFP_BLIT_PARAM_COLOR_KEY_HIGH 2 6887 6888 struct wined3d_arbfp_blitter 6889 { 6890 struct wined3d_blitter blitter; 6891 struct wine_rb_tree shaders; 6892 GLuint palette_texture; 6893 }; 6894 6895 static int arbfp_blit_type_compare(const void *key, const struct wine_rb_entry *entry) 6896 { 6897 const struct arbfp_blit_type *ka = key; 6898 const struct arbfp_blit_type *kb = &WINE_RB_ENTRY_VALUE(entry, const struct arbfp_blit_desc, entry)->type; 6899 6900 return memcmp(ka, kb, sizeof(*ka)); 6901 } 6902 6903 /* Context activation is done by the caller. */ 6904 static void arbfp_free_blit_shader(struct wine_rb_entry *entry, void *ctx) 6905 { 6906 struct arbfp_blit_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_blit_desc, entry); 6907 const struct wined3d_gl_info *gl_info; 6908 struct wined3d_context *context; 6909 6910 context = ctx; 6911 gl_info = context->gl_info; 6912 6913 GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader)); 6914 checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)"); 6915 heap_free(entry_arb); 6916 } 6917 6918 /* Context activation is done by the caller. */ 6919 static void arbfp_blitter_destroy(struct wined3d_blitter *blitter, struct wined3d_context *context) 6920 { 6921 const struct wined3d_gl_info *gl_info = context->gl_info; 6922 struct wined3d_arbfp_blitter *arbfp_blitter; 6923 struct wined3d_blitter *next; 6924 6925 if ((next = blitter->next)) 6926 next->ops->blitter_destroy(next, context); 6927 6928 arbfp_blitter = CONTAINING_RECORD(blitter, struct wined3d_arbfp_blitter, blitter); 6929 6930 wine_rb_destroy(&arbfp_blitter->shaders, arbfp_free_blit_shader, context); 6931 checkGLcall("Delete blit programs"); 6932 6933 if (arbfp_blitter->palette_texture) 6934 gl_info->gl_ops.gl.p_glDeleteTextures(1, &arbfp_blitter->palette_texture); 6935 6936 heap_free(arbfp_blitter); 6937 } 6938 6939 static void gen_packed_yuv_read(struct wined3d_string_buffer *buffer, 6940 const struct arbfp_blit_type *type, char *luminance) 6941 { 6942 char chroma; 6943 const char *tex, *texinstr = "TXP"; 6944 6945 if (type->fixup == COMPLEX_FIXUP_UYVY) 6946 { 6947 chroma = 'x'; 6948 *luminance = 'w'; 6949 } 6950 else 6951 { 6952 chroma = 'w'; 6953 *luminance = 'x'; 6954 } 6955 6956 tex = arbfp_texture_target(type->res_type); 6957 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_RECT) 6958 texinstr = "TEX"; 6959 6960 /* First we have to read the chroma values. This means we need at least two pixels(no filtering), 6961 * or 4 pixels(with filtering). To get the unmodified chromas, we have to rid ourselves of the 6962 * filtering when we sample the texture. 6963 * 6964 * These are the rules for reading the chroma: 6965 * 6966 * Even pixel: Cr 6967 * Even pixel: U 6968 * Odd pixel: V 6969 * 6970 * So we have to get the sampling x position in non-normalized coordinates in integers 6971 */ 6972 if (type->res_type != WINED3D_GL_RES_TYPE_TEX_RECT) 6973 { 6974 shader_addline(buffer, "MUL texcrd.xy, fragment.texcoord[0], size.x;\n"); 6975 shader_addline(buffer, "MOV texcrd.w, size.x;\n"); 6976 } 6977 else 6978 { 6979 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 6980 } 6981 /* We must not allow filtering between pixel x and x+1, this would mix U and V 6982 * Vertical filtering is ok. However, bear in mind that the pixel center is at 6983 * 0.5, so add 0.5. 6984 */ 6985 shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n"); 6986 shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.y;\n"); 6987 6988 /* Multiply the x coordinate by 0.5 and get the fraction. This gives 0.25 6989 * and 0.75 for the even and odd pixels respectively. */ 6990 shader_addline(buffer, "MUL texcrd2, texcrd, coef.y;\n"); 6991 shader_addline(buffer, "FRC texcrd2, texcrd2;\n"); 6992 6993 /* Sample Pixel 1. */ 6994 shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex); 6995 6996 /* Put the value into either of the chroma values */ 6997 shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n"); 6998 shader_addline(buffer, "MUL chroma.x, luminance.%c, temp.x;\n", chroma); 6999 shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n"); 7000 shader_addline(buffer, "MUL chroma.y, luminance.%c, temp.x;\n", chroma); 7001 7002 /* Sample pixel 2. If we read an even pixel(SLT above returned 1), sample 7003 * the pixel right to the current one. Otherwise, sample the left pixel. 7004 * Bias and scale the SLT result to -1;1 and add it to the texcrd.x. 7005 */ 7006 shader_addline(buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n"); 7007 shader_addline(buffer, "ADD texcrd.x, texcrd, temp.x;\n"); 7008 shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex); 7009 7010 /* Put the value into the other chroma */ 7011 shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n"); 7012 shader_addline(buffer, "MAD chroma.y, luminance.%c, temp.x, chroma.y;\n", chroma); 7013 shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n"); 7014 shader_addline(buffer, "MAD chroma.x, luminance.%c, temp.x, chroma.x;\n", chroma); 7015 7016 /* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of 7017 * the current one and lerp the two U and V values 7018 */ 7019 7020 /* This gives the correctly filtered luminance value */ 7021 shader_addline(buffer, "TEX luminance, fragment.texcoord[0], texture[0], %s;\n", tex); 7022 } 7023 7024 static void gen_yv12_read(struct wined3d_string_buffer *buffer, 7025 const struct arbfp_blit_type *type, char *luminance) 7026 { 7027 const char *tex; 7028 static const float yv12_coef[] 7029 = {2.0f / 3.0f, 1.0f / 6.0f, (2.0f / 3.0f) + (1.0f / 6.0f), 1.0f / 3.0f}; 7030 7031 tex = arbfp_texture_target(type->res_type); 7032 7033 /* YV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2) 7034 * V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So the effective 7035 * bitdepth is 12 bits per pixel. Since the U and V planes have only half the 7036 * pitch of the luminance plane, the packing into the gl texture is a bit 7037 * unfortunate. If the whole texture is interpreted as luminance data it looks 7038 * approximately like this: 7039 * 7040 * +----------------------------------+---- 7041 * | | 7042 * | | 7043 * | | 7044 * | | 7045 * | | 2 7046 * | LUMINANCE | - 7047 * | | 3 7048 * | | 7049 * | | 7050 * | | 7051 * | | 7052 * +----------------+-----------------+---- 7053 * | | | 7054 * | V even rows | V odd rows | 7055 * | | | 1 7056 * +----------------+------------------ - 7057 * | | | 3 7058 * | U even rows | U odd rows | 7059 * | | | 7060 * +----------------+-----------------+---- 7061 * | | | 7062 * | 0.5 | 0.5 | 7063 * 7064 * So it appears as if there are 4 chroma images, but in fact the odd rows 7065 * in the chroma images are in the same row as the even ones. So it is 7066 * kinda tricky to read 7067 * 7068 * When reading from rectangle textures, keep in mind that the input y coordinates 7069 * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height 7070 */ 7071 shader_addline(buffer, "PARAM yv12_coef = "); 7072 shader_arb_append_imm_vec4(buffer, yv12_coef); 7073 shader_addline(buffer, ";\n"); 7074 7075 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7076 /* the chroma planes have only half the width */ 7077 shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n"); 7078 7079 /* The first value is between 2/3 and 5/6th of the texture's height, so scale+bias 7080 * the coordinate. Also read the right side of the image when reading odd lines 7081 * 7082 * Don't forget to clamp the y values in into the range, otherwise we'll get filtering 7083 * bleeding 7084 */ 7085 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7086 { 7087 shader_addline(buffer, "RCP chroma.w, size.y;\n"); 7088 7089 shader_addline(buffer, "MUL texcrd2.y, texcrd.y, size.y;\n"); 7090 7091 shader_addline(buffer, "FLR texcrd2.y, texcrd2.y;\n"); 7092 shader_addline(buffer, "MAD texcrd.y, texcrd.y, yv12_coef.y, yv12_coef.x;\n"); 7093 7094 /* Read odd lines from the right side (add size * 0.5 to the x coordinate). */ 7095 shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */ 7096 shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n"); 7097 shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n"); 7098 shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n"); 7099 7100 /* clamp, keep the half pixel origin in mind */ 7101 shader_addline(buffer, "MAD temp.y, coef.y, chroma.w, yv12_coef.x;\n"); 7102 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7103 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.z;\n"); 7104 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7105 } 7106 else 7107 { 7108 /* The y coordinate for V is in the range [size, size + size / 4). */ 7109 shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n"); 7110 shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.w, size.y;\n"); 7111 7112 /* Read odd lines from the right side (add size * 0.5 to the x coordinate). */ 7113 shader_addline(buffer, "ADD texcrd2.x, texcrd.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */ 7114 shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n"); 7115 shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n"); 7116 shader_addline(buffer, "MUL texcrd2.x, texcrd2.x, size.x;\n"); 7117 shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n"); 7118 7119 /* Make sure to read exactly from the pixel center */ 7120 shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n"); 7121 shader_addline(buffer, "ADD texcrd.y, texcrd.y, coef.y;\n"); 7122 7123 /* Clamp */ 7124 shader_addline(buffer, "MAD temp.y, size.y, coef.w, size.y;\n"); 7125 shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n"); 7126 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7127 shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n"); 7128 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7129 } 7130 /* Read the texture, put the result into the output register */ 7131 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7132 shader_addline(buffer, "MOV chroma.x, temp.w;\n"); 7133 7134 /* The other chroma value is 1/6th of the texture lower, from 5/6th to 6/6th 7135 * No need to clamp because we're just reusing the already clamped value from above 7136 */ 7137 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7138 shader_addline(buffer, "ADD texcrd.y, texcrd.y, yv12_coef.y;\n"); 7139 else 7140 shader_addline(buffer, "MAD texcrd.y, size.y, coef.w, texcrd.y;\n"); 7141 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7142 shader_addline(buffer, "MOV chroma.y, temp.w;\n"); 7143 7144 /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate. 7145 * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance 7146 * values due to filtering 7147 */ 7148 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7149 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7150 { 7151 /* Multiply the y coordinate by 2/3 and clamp it */ 7152 shader_addline(buffer, "MUL texcrd.y, texcrd.y, yv12_coef.x;\n"); 7153 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.x;\n"); 7154 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7155 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7156 } 7157 else 7158 { 7159 /* Reading from texture_rectangles is pretty straightforward, just use the unmodified 7160 * texture coordinate. It is still a good idea to clamp it though, since the opengl texture 7161 * is bigger 7162 */ 7163 shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n"); 7164 shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n"); 7165 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7166 } 7167 *luminance = 'a'; 7168 } 7169 7170 static void gen_nv12_read(struct wined3d_string_buffer *buffer, 7171 const struct arbfp_blit_type *type, char *luminance) 7172 { 7173 const char *tex; 7174 static const float nv12_coef[] 7175 = {2.0f / 3.0f, 1.0f / 3.0f, 1.0f, 1.0f}; 7176 7177 tex = arbfp_texture_target(type->res_type); 7178 7179 /* NV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2) 7180 * sized plane where each component is an UV pair. So the effective 7181 * bitdepth is 12 bits per pixel If the whole texture is interpreted as luminance 7182 * data it looks approximately like this: 7183 * 7184 * +----------------------------------+---- 7185 * | | 7186 * | | 7187 * | | 7188 * | | 7189 * | | 2 7190 * | LUMINANCE | - 7191 * | | 3 7192 * | | 7193 * | | 7194 * | | 7195 * | | 7196 * +----------------------------------+---- 7197 * |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV| 7198 * |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV| 7199 * | | 1 7200 * | | - 7201 * | | 3 7202 * | | 7203 * | | 7204 * +----------------------------------+---- 7205 * 7206 * When reading from rectangle textures, keep in mind that the input y coordinates 7207 * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height. */ 7208 7209 shader_addline(buffer, "PARAM nv12_coef = "); 7210 shader_arb_append_imm_vec4(buffer, nv12_coef); 7211 shader_addline(buffer, ";\n"); 7212 7213 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7214 /* We only have half the number of chroma pixels. */ 7215 shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n"); 7216 7217 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7218 { 7219 shader_addline(buffer, "RCP chroma.w, size.x;\n"); 7220 shader_addline(buffer, "RCP chroma.z, size.y;\n"); 7221 7222 shader_addline(buffer, "MAD texcrd.y, texcrd.y, nv12_coef.y, nv12_coef.x;\n"); 7223 7224 /* We must not allow filtering horizontally, this would mix U and V. 7225 * Vertical filtering is ok. However, bear in mind that the pixel center is at 7226 * 0.5, so add 0.5. */ 7227 7228 /* Convert to non-normalized coordinates so we can find the 7229 * individual pixel. */ 7230 shader_addline(buffer, "MUL texcrd.x, texcrd.x, size.x;\n"); 7231 shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n"); 7232 /* Multiply by 2 since chroma components are stored in UV pixel pairs, 7233 * add 0.5 to hit the center of the pixel. */ 7234 shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n"); 7235 7236 /* Convert back to normalized coordinates. */ 7237 shader_addline(buffer, "MUL texcrd.x, texcrd.x, chroma.w;\n"); 7238 7239 /* Clamp, keep the half pixel origin in mind. */ 7240 shader_addline(buffer, "MAD temp.y, coef.y, chroma.z, nv12_coef.x;\n"); 7241 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7242 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.z, nv12_coef.z;\n"); 7243 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7244 } 7245 else 7246 { 7247 /* The y coordinate for chroma is in the range [size, size + size / 2). */ 7248 shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.y, size.y;\n"); 7249 7250 shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n"); 7251 /* Multiply by 2 since chroma components are stored in UV pixel pairs, 7252 * add 0.5 to hit the center of the pixel. */ 7253 shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n"); 7254 7255 /* Clamp */ 7256 shader_addline(buffer, "MAD temp.y, size.y, coef.y, size.y;\n"); 7257 shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n"); 7258 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7259 shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n"); 7260 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7261 } 7262 /* Read the texture, put the result into the output register. */ 7263 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7264 shader_addline(buffer, "MOV chroma.y, temp.w;\n"); 7265 7266 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7267 { 7268 /* Add 1/size.x */ 7269 shader_addline(buffer, "ADD texcrd.x, texcrd.x, chroma.w;\n"); 7270 } 7271 else 7272 { 7273 /* Add 1 */ 7274 shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.x;\n"); 7275 } 7276 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7277 shader_addline(buffer, "MOV chroma.x, temp.w;\n"); 7278 7279 /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate. 7280 * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance 7281 * values due to filtering. */ 7282 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7283 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7284 { 7285 /* Multiply the y coordinate by 2/3 and clamp it */ 7286 shader_addline(buffer, "MUL texcrd.y, texcrd.y, nv12_coef.x;\n"); 7287 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, nv12_coef.x;\n"); 7288 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7289 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7290 } 7291 else 7292 { 7293 /* Reading from texture_rectangles is pretty straightforward, just use the unmodified 7294 * texture coordinate. It is still a good idea to clamp it though, since the opengl texture 7295 * is bigger 7296 */ 7297 shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n"); 7298 shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n"); 7299 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7300 } 7301 *luminance = 'a'; 7302 } 7303 7304 /* Context activation is done by the caller. */ 7305 static GLuint gen_p8_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type) 7306 { 7307 GLuint shader; 7308 struct wined3d_string_buffer buffer; 7309 const char *tex_target = arbfp_texture_target(type->res_type); 7310 7311 /* This should not happen because we only use this conversion for 7312 * present blits which don't use color keying. */ 7313 if (type->use_color_key) 7314 FIXME("Implement P8 color keying.\n"); 7315 7316 /* Shader header */ 7317 if (!string_buffer_init(&buffer)) 7318 { 7319 ERR("Failed to initialize shader buffer.\n"); 7320 return 0; 7321 } 7322 7323 GL_EXTCALL(glGenProgramsARB(1, &shader)); 7324 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7325 if (!shader) 7326 { 7327 string_buffer_free(&buffer); 7328 return 0; 7329 } 7330 7331 shader_addline(&buffer, "!!ARBfp1.0\n"); 7332 shader_addline(&buffer, "TEMP index;\n"); 7333 7334 /* { 255/256, 0.5/255*255/256, 0, 0 } */ 7335 shader_addline(&buffer, "PARAM constants = { 0.996, 0.00195, 0, 0 };\n"); 7336 7337 /* The alpha-component contains the palette index */ 7338 shader_addline(&buffer, "TEX index, fragment.texcoord[0], texture[0], %s;\n", tex_target); 7339 7340 /* Scale the index by 255/256 and add a bias of '0.5' in order to sample in the middle */ 7341 shader_addline(&buffer, "MAD index.a, index.a, constants.x, constants.y;\n"); 7342 7343 /* Use the alpha-component as an index in the palette to get the final color */ 7344 shader_addline(&buffer, "TEX result.color, index.a, texture[1], 1D;\n"); 7345 shader_addline(&buffer, "END\n"); 7346 7347 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 7348 7349 string_buffer_free(&buffer); 7350 7351 return shader; 7352 } 7353 7354 /* Context activation is done by the caller. */ 7355 static void upload_palette(struct wined3d_arbfp_blitter *blitter, 7356 const struct wined3d_texture *texture, struct wined3d_context *context) 7357 { 7358 const struct wined3d_palette *palette = texture->swapchain ? texture->swapchain->palette : NULL; 7359 const struct wined3d_gl_info *gl_info = context->gl_info; 7360 7361 if (!blitter->palette_texture) 7362 gl_info->gl_ops.gl.p_glGenTextures(1, &blitter->palette_texture); 7363 7364 GL_EXTCALL(glActiveTexture(GL_TEXTURE1)); 7365 gl_info->gl_ops.gl.p_glBindTexture(GL_TEXTURE_1D, blitter->palette_texture); 7366 7367 gl_info->gl_ops.gl.p_glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); 7368 7369 gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 7370 /* Make sure we have discrete color levels. */ 7371 gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 7372 gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 7373 /* TODO: avoid unneeded uploads in the future by adding some SFLAG_PALETTE_DIRTY mechanism */ 7374 if (palette) 7375 { 7376 gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 256, 0, GL_BGRA, 7377 GL_UNSIGNED_INT_8_8_8_8_REV, palette->colors); 7378 } 7379 else 7380 { 7381 static const DWORD black; 7382 FIXME("P8 surface loaded without a palette.\n"); 7383 gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 1, 0, GL_BGRA, 7384 GL_UNSIGNED_INT_8_8_8_8_REV, &black); 7385 } 7386 7387 /* Switch back to unit 0 in which the 2D texture will be stored. */ 7388 context_active_texture(context, gl_info, 0); 7389 } 7390 7391 /* Context activation is done by the caller. */ 7392 static GLuint gen_yuv_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type) 7393 { 7394 GLuint shader; 7395 struct wined3d_string_buffer buffer; 7396 char luminance_component; 7397 7398 if (type->use_color_key) 7399 FIXME("Implement YUV color keying.\n"); 7400 7401 /* Shader header */ 7402 if (!string_buffer_init(&buffer)) 7403 { 7404 ERR("Failed to initialize shader buffer.\n"); 7405 return 0; 7406 } 7407 7408 GL_EXTCALL(glGenProgramsARB(1, &shader)); 7409 checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))"); 7410 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7411 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)"); 7412 if (!shader) 7413 { 7414 string_buffer_free(&buffer); 7415 return 0; 7416 } 7417 7418 /* The YUY2 and UYVY formats contain two pixels packed into a 32 bit macropixel, 7419 * giving effectively 16 bit per pixel. The color consists of a luminance(Y) and 7420 * two chroma(U and V) values. Each macropixel has two luminance values, one for 7421 * each single pixel it contains, and one U and one V value shared between both 7422 * pixels. 7423 * 7424 * The data is loaded into an A8L8 texture. With YUY2, the luminance component 7425 * contains the luminance and alpha the chroma. With UYVY it is vice versa. Thus 7426 * take the format into account when generating the read swizzles 7427 * 7428 * Reading the Y value is straightforward - just sample the texture. The hardware 7429 * takes care of filtering in the horizontal and vertical direction. 7430 * 7431 * Reading the U and V values is harder. We have to avoid filtering horizontally, 7432 * because that would mix the U and V values of one pixel or two adjacent pixels. 7433 * Thus floor the texture coordinate and add 0.5 to get an unfiltered read, 7434 * regardless of the filtering setting. Vertical filtering works automatically 7435 * though - the U and V values of two rows are mixed nicely. 7436 * 7437 * Apart of avoiding filtering issues, the code has to know which value it just 7438 * read, and where it can find the other one. To determine this, it checks if 7439 * it sampled an even or odd pixel, and shifts the 2nd read accordingly. 7440 * 7441 * Handling horizontal filtering of U and V values requires reading a 2nd pair 7442 * of pixels, extracting U and V and mixing them. This is not implemented yet. 7443 * 7444 * An alternative implementation idea is to load the texture as A8R8G8B8 texture, 7445 * with width / 2. This way one read gives all 3 values, finding U and V is easy 7446 * in an unfiltered situation. Finding the luminance on the other hand requires 7447 * finding out if it is an odd or even pixel. The real drawback of this approach 7448 * is filtering. This would have to be emulated completely in the shader, reading 7449 * up two 2 packed pixels in up to 2 rows and interpolating both horizontally and 7450 * vertically. Beyond that it would require adjustments to the texture handling 7451 * code to deal with the width scaling 7452 */ 7453 shader_addline(&buffer, "!!ARBfp1.0\n"); 7454 shader_addline(&buffer, "TEMP luminance;\n"); 7455 shader_addline(&buffer, "TEMP temp;\n"); 7456 shader_addline(&buffer, "TEMP chroma;\n"); 7457 shader_addline(&buffer, "TEMP texcrd;\n"); 7458 shader_addline(&buffer, "TEMP texcrd2;\n"); 7459 shader_addline(&buffer, "PARAM coef = {1.0, 0.5, 2.0, 0.25};\n"); 7460 shader_addline(&buffer, "PARAM yuv_coef = {1.403, 0.344, 0.714, 1.770};\n"); 7461 shader_addline(&buffer, "PARAM size = program.local[%u];\n", ARBFP_BLIT_PARAM_SIZE); 7462 7463 switch (type->fixup) 7464 { 7465 case COMPLEX_FIXUP_UYVY: 7466 case COMPLEX_FIXUP_YUY2: 7467 gen_packed_yuv_read(&buffer, type, &luminance_component); 7468 break; 7469 7470 case COMPLEX_FIXUP_YV12: 7471 gen_yv12_read(&buffer, type, &luminance_component); 7472 break; 7473 7474 case COMPLEX_FIXUP_NV12: 7475 gen_nv12_read(&buffer, type, &luminance_component); 7476 break; 7477 7478 default: 7479 FIXME("Unsupported YUV fixup %#x\n", type->fixup); 7480 string_buffer_free(&buffer); 7481 return 0; 7482 } 7483 7484 /* Calculate the final result. Formula is taken from 7485 * http://www.fourcc.org/fccyvrgb.php. Note that the chroma 7486 * ranges from -0.5 to 0.5 7487 */ 7488 shader_addline(&buffer, "SUB chroma.xy, chroma, coef.y;\n"); 7489 7490 shader_addline(&buffer, "MAD result.color.x, chroma.x, yuv_coef.x, luminance.%c;\n", luminance_component); 7491 shader_addline(&buffer, "MAD temp.x, -chroma.y, yuv_coef.y, luminance.%c;\n", luminance_component); 7492 shader_addline(&buffer, "MAD result.color.y, -chroma.x, yuv_coef.z, temp.x;\n"); 7493 shader_addline(&buffer, "MAD result.color.z, chroma.y, yuv_coef.w, luminance.%c;\n", luminance_component); 7494 shader_addline(&buffer, "END\n"); 7495 7496 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 7497 7498 string_buffer_free(&buffer); 7499 7500 return shader; 7501 } 7502 7503 /* Context activation is done by the caller. */ 7504 static GLuint arbfp_gen_plain_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type) 7505 { 7506 GLuint shader; 7507 struct wined3d_string_buffer buffer; 7508 const char *tex_target = arbfp_texture_target(type->res_type); 7509 7510 /* Shader header */ 7511 if (!string_buffer_init(&buffer)) 7512 { 7513 ERR("Failed to initialize shader buffer.\n"); 7514 return 0; 7515 } 7516 7517 GL_EXTCALL(glGenProgramsARB(1, &shader)); 7518 if (!shader) 7519 { 7520 string_buffer_free(&buffer); 7521 return 0; 7522 } 7523 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7524 7525 shader_addline(&buffer, "!!ARBfp1.0\n"); 7526 7527 if (type->use_color_key) 7528 { 7529 shader_addline(&buffer, "TEMP color;\n"); 7530 shader_addline(&buffer, "TEMP less, greater;\n"); 7531 shader_addline(&buffer, "PARAM color_key_low = program.local[%u];\n", ARBFP_BLIT_PARAM_COLOR_KEY_LOW); 7532 shader_addline(&buffer, "PARAM color_key_high = program.local[%u];\n", ARBFP_BLIT_PARAM_COLOR_KEY_HIGH); 7533 shader_addline(&buffer, "TEX color, fragment.texcoord[0], texture[0], %s;\n", tex_target); 7534 shader_addline(&buffer, "SLT less, color, color_key_low;\n"); /* below low key */ 7535 shader_addline(&buffer, "SGE greater, color, color_key_high;\n"); /* above high key */ 7536 shader_addline(&buffer, "ADD less, less, greater;\n"); /* or */ 7537 shader_addline(&buffer, "DP4 less.b, less, less;\n"); /* on any channel */ 7538 shader_addline(&buffer, "SGE less, -less.b, 0.0;\n"); /* logical not */ 7539 shader_addline(&buffer, "KIL -less;\n"); /* discard if true */ 7540 shader_addline(&buffer, "MOV result.color, color;\n"); 7541 } 7542 else 7543 { 7544 shader_addline(&buffer, "TEX result.color, fragment.texcoord[0], texture[0], %s;\n", tex_target); 7545 } 7546 7547 shader_addline(&buffer, "END\n"); 7548 7549 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 7550 7551 string_buffer_free(&buffer); 7552 7553 return shader; 7554 } 7555 7556 /* Context activation is done by the caller. */ 7557 static HRESULT arbfp_blit_set(struct wined3d_arbfp_blitter *blitter, struct wined3d_context *context, 7558 const struct wined3d_texture *texture, unsigned int sub_resource_idx, 7559 const struct wined3d_color_key *color_key) 7560 { 7561 enum complex_fixup fixup; 7562 const struct wined3d_gl_info *gl_info = context->gl_info; 7563 struct wine_rb_entry *entry; 7564 struct arbfp_blit_type type; 7565 struct arbfp_blit_desc *desc; 7566 struct wined3d_color float_color_key[2]; 7567 struct wined3d_vec4 size; 7568 unsigned int level; 7569 GLuint shader; 7570 7571 level = sub_resource_idx % texture->level_count; 7572 size.x = wined3d_texture_get_level_pow2_width(texture, level); 7573 size.y = wined3d_texture_get_level_pow2_height(texture, level); 7574 size.z = 1.0f; 7575 size.w = 1.0f; 7576 7577 if (is_complex_fixup(texture->resource.format->color_fixup)) 7578 fixup = get_complex_fixup(texture->resource.format->color_fixup); 7579 else 7580 fixup = COMPLEX_FIXUP_NONE; 7581 7582 switch (texture->target) 7583 { 7584 case GL_TEXTURE_1D: 7585 type.res_type = WINED3D_GL_RES_TYPE_TEX_1D; 7586 break; 7587 7588 case GL_TEXTURE_2D: 7589 type.res_type = WINED3D_GL_RES_TYPE_TEX_2D; 7590 break; 7591 7592 case GL_TEXTURE_3D: 7593 type.res_type = WINED3D_GL_RES_TYPE_TEX_3D; 7594 break; 7595 7596 case GL_TEXTURE_CUBE_MAP_ARB: 7597 type.res_type = WINED3D_GL_RES_TYPE_TEX_CUBE; 7598 break; 7599 7600 case GL_TEXTURE_RECTANGLE_ARB: 7601 type.res_type = WINED3D_GL_RES_TYPE_TEX_RECT; 7602 break; 7603 7604 default: 7605 ERR("Unexpected GL texture type %#x.\n", texture->target); 7606 type.res_type = WINED3D_GL_RES_TYPE_TEX_2D; 7607 } 7608 type.fixup = fixup; 7609 type.use_color_key = !!color_key; 7610 type.padding = 0; 7611 7612 if ((entry = wine_rb_get(&blitter->shaders, &type))) 7613 { 7614 desc = WINE_RB_ENTRY_VALUE(entry, struct arbfp_blit_desc, entry); 7615 shader = desc->shader; 7616 } 7617 else 7618 { 7619 switch (fixup) 7620 { 7621 case COMPLEX_FIXUP_NONE: 7622 if (!is_identity_fixup(texture->resource.format->color_fixup)) 7623 FIXME("Implement support for sign or swizzle fixups.\n"); 7624 shader = arbfp_gen_plain_shader(gl_info, &type); 7625 break; 7626 7627 case COMPLEX_FIXUP_P8: 7628 shader = gen_p8_shader(gl_info, &type); 7629 break; 7630 7631 case COMPLEX_FIXUP_YUY2: 7632 case COMPLEX_FIXUP_UYVY: 7633 case COMPLEX_FIXUP_YV12: 7634 case COMPLEX_FIXUP_NV12: 7635 shader = gen_yuv_shader(gl_info, &type); 7636 break; 7637 } 7638 7639 if (!shader) 7640 { 7641 FIXME("Unsupported complex fixup %#x, not setting a shader\n", fixup); 7642 return E_NOTIMPL; 7643 } 7644 7645 if (!(desc = heap_alloc(sizeof(*desc)))) 7646 goto err_out; 7647 7648 desc->type = type; 7649 desc->shader = shader; 7650 if (wine_rb_put(&blitter->shaders, &desc->type, &desc->entry) == -1) 7651 { 7652 err_out: 7653 ERR("Out of memory\n"); 7654 GL_EXTCALL(glDeleteProgramsARB(1, &shader)); 7655 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader))"); 7656 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0)); 7657 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0)"); 7658 heap_free(desc); 7659 return E_OUTOFMEMORY; 7660 } 7661 } 7662 7663 if (fixup == COMPLEX_FIXUP_P8) 7664 upload_palette(blitter, texture, context); 7665 7666 gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB); 7667 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)"); 7668 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7669 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)"); 7670 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARBFP_BLIT_PARAM_SIZE, &size.x)); 7671 checkGLcall("glProgramLocalParameter4fvARB"); 7672 if (type.use_color_key) 7673 { 7674 wined3d_format_get_float_color_key(texture->resource.format, color_key, float_color_key); 7675 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 7676 ARBFP_BLIT_PARAM_COLOR_KEY_LOW, &float_color_key[0].r)); 7677 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 7678 ARBFP_BLIT_PARAM_COLOR_KEY_HIGH, &float_color_key[1].r)); 7679 checkGLcall("glProgramLocalParameter4fvARB"); 7680 } 7681 7682 return WINED3D_OK; 7683 } 7684 7685 /* Context activation is done by the caller. */ 7686 static void arbfp_blit_unset(const struct wined3d_gl_info *gl_info) 7687 { 7688 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 7689 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 7690 } 7691 7692 static BOOL arbfp_blit_supported(enum wined3d_blit_op blit_op, const struct wined3d_context *context, 7693 const struct wined3d_resource *src_resource, DWORD src_location, 7694 const struct wined3d_resource *dst_resource, DWORD dst_location) 7695 { 7696 const struct wined3d_format *src_format = src_resource->format; 7697 const struct wined3d_format *dst_format = dst_resource->format; 7698 enum complex_fixup src_fixup; 7699 BOOL decompress; 7700 7701 if (src_resource->type != WINED3D_RTYPE_TEXTURE_2D) 7702 return FALSE; 7703 7704 if (blit_op == WINED3D_BLIT_OP_RAW_BLIT && dst_format->id == src_format->id) 7705 { 7706 if (dst_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)) 7707 blit_op = WINED3D_BLIT_OP_DEPTH_BLIT; 7708 else 7709 blit_op = WINED3D_BLIT_OP_COLOR_BLIT; 7710 } 7711 7712 switch (blit_op) 7713 { 7714 case WINED3D_BLIT_OP_COLOR_BLIT_CKEY: 7715 if (!context->d3d_info->shader_color_key) 7716 { 7717 /* The conversion modifies the alpha channel so the color key might no longer match. */ 7718 TRACE("Color keying not supported with converted textures.\n"); 7719 return FALSE; 7720 } 7721 case WINED3D_BLIT_OP_COLOR_BLIT_ALPHATEST: 7722 case WINED3D_BLIT_OP_COLOR_BLIT: 7723 break; 7724 7725 default: 7726 TRACE("Unsupported blit_op=%d\n", blit_op); 7727 return FALSE; 7728 } 7729 7730 decompress = src_format && (src_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED) 7731 && !(dst_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED); 7732 if (!decompress && !(src_resource->access & dst_resource->access & WINED3D_RESOURCE_ACCESS_GPU)) 7733 return FALSE; 7734 7735 src_fixup = get_complex_fixup(src_format->color_fixup); 7736 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d)) 7737 { 7738 TRACE("Checking support for fixup:\n"); 7739 dump_color_fixup_desc(src_format->color_fixup); 7740 } 7741 7742 if (!is_identity_fixup(dst_format->color_fixup) 7743 && (dst_format->id != src_format->id || dst_location != WINED3D_LOCATION_DRAWABLE)) 7744 { 7745 TRACE("Destination fixups are not supported\n"); 7746 return FALSE; 7747 } 7748 7749 if (is_identity_fixup(src_format->color_fixup)) 7750 { 7751 TRACE("[OK]\n"); 7752 return TRUE; 7753 } 7754 7755 /* We only support YUV conversions. */ 7756 if (!is_complex_fixup(src_format->color_fixup)) 7757 { 7758 if (wined3d_settings.offscreen_rendering_mode == ORM_BACKBUFFER) 7759 { 7760 WARN("Claiming fixup support because of ORM_BACKBUFFER.\n"); 7761 return TRUE; 7762 } 7763 7764 TRACE("[FAILED]\n"); 7765 return FALSE; 7766 } 7767 7768 switch(src_fixup) 7769 { 7770 case COMPLEX_FIXUP_YUY2: 7771 case COMPLEX_FIXUP_UYVY: 7772 case COMPLEX_FIXUP_YV12: 7773 case COMPLEX_FIXUP_NV12: 7774 case COMPLEX_FIXUP_P8: 7775 TRACE("[OK]\n"); 7776 return TRUE; 7777 7778 default: 7779 FIXME("Unsupported YUV fixup %#x\n", src_fixup); 7780 TRACE("[FAILED]\n"); 7781 return FALSE; 7782 } 7783 } 7784 7785 static DWORD arbfp_blitter_blit(struct wined3d_blitter *blitter, enum wined3d_blit_op op, 7786 struct wined3d_context *context, struct wined3d_texture *src_texture, unsigned int src_sub_resource_idx, 7787 DWORD src_location, const RECT *src_rect, struct wined3d_texture *dst_texture, 7788 unsigned int dst_sub_resource_idx, DWORD dst_location, const RECT *dst_rect, 7789 const struct wined3d_color_key *color_key, enum wined3d_texture_filter_type filter) 7790 { 7791 struct wined3d_device *device = dst_texture->resource.device; 7792 struct wined3d_texture *staging_texture = NULL; 7793 struct wined3d_arbfp_blitter *arbfp_blitter; 7794 struct wined3d_color_key alpha_test_key; 7795 struct wined3d_blitter *next; 7796 unsigned int src_level; 7797 RECT s, d; 7798 7799 TRACE("blitter %p, op %#x, context %p, src_texture %p, src_sub_resource_idx %u, src_location %s, src_rect %s, " 7800 "dst_texture %p, dst_sub_resource_idx %u, dst_location %s, dst_rect %s, colour_key %p, filter %s.\n", 7801 blitter, op, context, src_texture, src_sub_resource_idx, wined3d_debug_location(src_location), 7802 wine_dbgstr_rect(src_rect), dst_texture, dst_sub_resource_idx, wined3d_debug_location(dst_location), 7803 wine_dbgstr_rect(dst_rect), color_key, debug_d3dtexturefiltertype(filter)); 7804 7805 if (!arbfp_blit_supported(op, context, &src_texture->resource, src_location, 7806 &dst_texture->resource, dst_location)) 7807 { 7808 if (!(next = blitter->next)) 7809 { 7810 ERR("No blitter to handle blit op %#x.\n", op); 7811 return dst_location; 7812 } 7813 7814 TRACE("Forwarding to blitter %p.\n", next); 7815 return next->ops->blitter_blit(next, op, context, src_texture, src_sub_resource_idx, src_location, 7816 src_rect, dst_texture, dst_sub_resource_idx, dst_location, dst_rect, color_key, filter); 7817 } 7818 7819 arbfp_blitter = CONTAINING_RECORD(blitter, struct wined3d_arbfp_blitter, blitter); 7820 7821 if (!(src_texture->resource.access & WINED3D_RESOURCE_ACCESS_GPU)) 7822 { 7823 struct wined3d_resource_desc desc; 7824 struct wined3d_box upload_box; 7825 HRESULT hr; 7826 7827 TRACE("Source texture is not GPU accessible, creating a staging texture.\n"); 7828 7829 src_level = src_sub_resource_idx % src_texture->level_count; 7830 desc.resource_type = WINED3D_RTYPE_TEXTURE_2D; 7831 desc.format = src_texture->resource.format->id; 7832 desc.multisample_type = src_texture->resource.multisample_type; 7833 desc.multisample_quality = src_texture->resource.multisample_quality; 7834 desc.usage = WINED3DUSAGE_PRIVATE; 7835 desc.access = WINED3D_RESOURCE_ACCESS_GPU; 7836 desc.width = wined3d_texture_get_level_width(src_texture, src_level); 7837 desc.height = wined3d_texture_get_level_height(src_texture, src_level); 7838 desc.depth = 1; 7839 desc.size = 0; 7840 7841 if (FAILED(hr = wined3d_texture_create(device, &desc, 1, 1, 0, 7842 NULL, NULL, &wined3d_null_parent_ops, &staging_texture))) 7843 { 7844 ERR("Failed to create staging texture, hr %#x.\n", hr); 7845 return dst_location; 7846 } 7847 7848 wined3d_box_set(&upload_box, 0, 0, desc.width, desc.height, 0, desc.depth); 7849 wined3d_texture_upload_from_texture(staging_texture, 0, 0, 0, 0, 7850 src_texture, src_sub_resource_idx, &upload_box); 7851 7852 src_texture = staging_texture; 7853 src_sub_resource_idx = 0; 7854 } 7855 else if (wined3d_settings.offscreen_rendering_mode != ORM_FBO 7856 && (src_texture->sub_resources[src_sub_resource_idx].locations 7857 & (WINED3D_LOCATION_TEXTURE_RGB | WINED3D_LOCATION_DRAWABLE)) == WINED3D_LOCATION_DRAWABLE 7858 && !wined3d_resource_is_offscreen(&src_texture->resource)) 7859 { 7860 7861 /* Without FBO blits transferring from the drawable to the texture is 7862 * expensive, because we have to flip the data in sysmem. Since we can 7863 * flip in the blitter, we don't actually need that flip anyway. So we 7864 * use the surface's texture as scratch texture, and flip the source 7865 * rectangle instead. */ 7866 texture2d_load_fb_texture(src_texture, src_sub_resource_idx, FALSE, context); 7867 7868 s = *src_rect; 7869 src_level = src_sub_resource_idx % src_texture->level_count; 7870 s.top = wined3d_texture_get_level_height(src_texture, src_level) - s.top; 7871 s.bottom = wined3d_texture_get_level_height(src_texture, src_level) - s.bottom; 7872 src_rect = &s; 7873 } 7874 else 7875 { 7876 wined3d_texture_load(src_texture, context, FALSE); 7877 } 7878 7879 context_apply_ffp_blit_state(context, device); 7880 7881 if (dst_location == WINED3D_LOCATION_DRAWABLE) 7882 { 7883 d = *dst_rect; 7884 wined3d_texture_translate_drawable_coords(dst_texture, context->win_handle, &d); 7885 dst_rect = &d; 7886 } 7887 7888 if (wined3d_settings.offscreen_rendering_mode == ORM_FBO) 7889 { 7890 GLenum buffer; 7891 7892 if (dst_location == WINED3D_LOCATION_DRAWABLE) 7893 { 7894 TRACE("Destination texture %p is onscreen.\n", dst_texture); 7895 buffer = wined3d_texture_get_gl_buffer(dst_texture); 7896 } 7897 else 7898 { 7899 TRACE("Destination texture %p is offscreen.\n", dst_texture); 7900 buffer = GL_COLOR_ATTACHMENT0; 7901 } 7902 context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, 7903 &dst_texture->resource, dst_sub_resource_idx, NULL, 0, dst_location); 7904 context_set_draw_buffer(context, buffer); 7905 context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER); 7906 context_invalidate_state(context, STATE_FRAMEBUFFER); 7907 } 7908 7909 if (op == WINED3D_BLIT_OP_COLOR_BLIT_ALPHATEST) 7910 { 7911 const struct wined3d_format *fmt = src_texture->resource.format; 7912 alpha_test_key.color_space_low_value = 0; 7913 alpha_test_key.color_space_high_value = ~(((1u << fmt->alpha_size) - 1) << fmt->alpha_offset); 7914 color_key = &alpha_test_key; 7915 } 7916 7917 arbfp_blit_set(arbfp_blitter, context, src_texture, src_sub_resource_idx, color_key); 7918 7919 /* Draw a textured quad */ 7920 context_draw_textured_quad(context, src_texture, src_sub_resource_idx, src_rect, dst_rect, filter); 7921 7922 /* Leave the opengl state valid for blitting */ 7923 arbfp_blit_unset(context->gl_info); 7924 7925 if (dst_texture->swapchain && (dst_texture->swapchain->front_buffer == dst_texture)) 7926 context->gl_info->gl_ops.gl.p_glFlush(); 7927 7928 if (staging_texture) 7929 wined3d_texture_decref(staging_texture); 7930 7931 return dst_location; 7932 } 7933 7934 static void arbfp_blitter_clear(struct wined3d_blitter *blitter, struct wined3d_device *device, 7935 unsigned int rt_count, const struct wined3d_fb_state *fb, unsigned int rect_count, const RECT *clear_rects, 7936 const RECT *draw_rect, DWORD flags, const struct wined3d_color *colour, float depth, DWORD stencil) 7937 { 7938 struct wined3d_blitter *next; 7939 7940 if ((next = blitter->next)) 7941 next->ops->blitter_clear(next, device, rt_count, fb, rect_count, 7942 clear_rects, draw_rect, flags, colour, depth, stencil); 7943 } 7944 7945 static const struct wined3d_blitter_ops arbfp_blitter_ops = 7946 { 7947 arbfp_blitter_destroy, 7948 arbfp_blitter_clear, 7949 arbfp_blitter_blit, 7950 }; 7951 7952 void wined3d_arbfp_blitter_create(struct wined3d_blitter **next, const struct wined3d_device *device) 7953 { 7954 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 7955 struct wined3d_arbfp_blitter *blitter; 7956 7957 if (device->shader_backend != &arb_program_shader_backend 7958 && device->shader_backend != &glsl_shader_backend) 7959 return; 7960 7961 if (!gl_info->supported[ARB_FRAGMENT_PROGRAM]) 7962 return; 7963 7964 if (!gl_info->supported[WINED3D_GL_LEGACY_CONTEXT]) 7965 return; 7966 7967 if (!(blitter = heap_alloc(sizeof(*blitter)))) 7968 { 7969 ERR("Failed to allocate blitter.\n"); 7970 return; 7971 } 7972 7973 TRACE("Created blitter %p.\n", blitter); 7974 7975 blitter->blitter.ops = &arbfp_blitter_ops; 7976 blitter->blitter.next = *next; 7977 wine_rb_init(&blitter->shaders, arbfp_blit_type_compare); 7978 blitter->palette_texture = 0; 7979 *next = &blitter->blitter; 7980 } 7981