1 /* 2 * Pixel and vertex shaders implementation using ARB_vertex_program 3 * and ARB_fragment_program GL extensions. 4 * 5 * Copyright 2002-2003 Jason Edmeades 6 * Copyright 2002-2003 Raphael Junqueira 7 * Copyright 2004 Christian Costa 8 * Copyright 2005 Oliver Stieber 9 * Copyright 2006 Ivan Gyurdiev 10 * Copyright 2006 Jason Green 11 * Copyright 2006 Henri Verbeet 12 * Copyright 2007-2011, 2013-2014 Stefan Dösinger for CodeWeavers 13 * Copyright 2009 Henri Verbeet for CodeWeavers 14 * 15 * This library is free software; you can redistribute it and/or 16 * modify it under the terms of the GNU Lesser General Public 17 * License as published by the Free Software Foundation; either 18 * version 2.1 of the License, or (at your option) any later version. 19 * 20 * This library is distributed in the hope that it will be useful, 21 * but WITHOUT ANY WARRANTY; without even the implied warranty of 22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 * Lesser General Public License for more details. 24 * 25 * You should have received a copy of the GNU Lesser General Public 26 * License along with this library; if not, write to the Free Software 27 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 28 */ 29 30 #include "config.h" 31 #include "wine/port.h" 32 33 #include <stdio.h> 34 35 #include "wined3d_private.h" 36 37 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader); 38 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants); 39 WINE_DECLARE_DEBUG_CHANNEL(d3d); 40 WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); 41 42 static BOOL shader_is_pshader_version(enum wined3d_shader_type type) 43 { 44 return type == WINED3D_SHADER_TYPE_PIXEL; 45 } 46 47 static BOOL shader_is_vshader_version(enum wined3d_shader_type type) 48 { 49 return type == WINED3D_SHADER_TYPE_VERTEX; 50 } 51 52 static const char *get_line(const char **ptr) 53 { 54 const char *p, *q; 55 56 p = *ptr; 57 if (!(q = strstr(p, "\n"))) 58 { 59 if (!*p) return NULL; 60 *ptr += strlen(p); 61 return p; 62 } 63 *ptr = q + 1; 64 65 return p; 66 } 67 68 enum arb_helper_value 69 { 70 ARB_ZERO, 71 ARB_ONE, 72 ARB_TWO, 73 ARB_0001, 74 ARB_EPS, 75 76 ARB_VS_REL_OFFSET 77 }; 78 79 static const char *arb_get_helper_value(enum wined3d_shader_type shader, enum arb_helper_value value) 80 { 81 if (shader != WINED3D_SHADER_TYPE_VERTEX && shader != WINED3D_SHADER_TYPE_PIXEL) 82 { 83 ERR("Unsupported shader type '%s'.\n", debug_shader_type(shader)); 84 return "bad"; 85 } 86 87 if (shader == WINED3D_SHADER_TYPE_PIXEL) 88 { 89 switch (value) 90 { 91 case ARB_ZERO: return "ps_helper_const.x"; 92 case ARB_ONE: return "ps_helper_const.y"; 93 case ARB_TWO: return "coefmul.x"; 94 case ARB_0001: return "ps_helper_const.xxxy"; 95 case ARB_EPS: return "ps_helper_const.z"; 96 default: break; 97 } 98 } 99 else 100 { 101 switch (value) 102 { 103 case ARB_ZERO: return "helper_const.x"; 104 case ARB_ONE: return "helper_const.y"; 105 case ARB_TWO: return "helper_const.z"; 106 case ARB_EPS: return "helper_const.w"; 107 case ARB_0001: return "helper_const.xxxy"; 108 case ARB_VS_REL_OFFSET: return "rel_addr_const.y"; 109 } 110 } 111 FIXME("Unmanaged %s shader helper constant requested: %u.\n", 112 shader == WINED3D_SHADER_TYPE_PIXEL ? "pixel" : "vertex", value); 113 switch (value) 114 { 115 case ARB_ZERO: return "0.0"; 116 case ARB_ONE: return "1.0"; 117 case ARB_TWO: return "2.0"; 118 case ARB_0001: return "{0.0, 0.0, 0.0, 1.0}"; 119 case ARB_EPS: return "1e-8"; 120 default: return "bad"; 121 } 122 } 123 124 static inline BOOL ffp_clip_emul(const struct wined3d_context *context) 125 { 126 return context->lowest_disabled_stage < 7; 127 } 128 129 /* ARB_program_shader private data */ 130 131 struct control_frame 132 { 133 struct list entry; 134 enum 135 { 136 IF, 137 IFC, 138 LOOP, 139 REP 140 } type; 141 BOOL muting; 142 BOOL outer_loop; 143 union 144 { 145 unsigned int loop; 146 unsigned int ifc; 147 } no; 148 struct wined3d_shader_loop_control loop_control; 149 BOOL had_else; 150 }; 151 152 struct arb_ps_np2fixup_info 153 { 154 struct ps_np2fixup_info super; 155 /* For ARB we need an offset value: 156 * With both GLSL and ARB mode the NP2 fixup information (the texture dimensions) are stored in a 157 * consecutive way (GLSL uses a uniform array). Since ARB doesn't know the notion of a "standalone" 158 * array we need an offset to the index inside the program local parameter array. */ 159 UINT offset; 160 }; 161 162 struct arb_ps_compile_args 163 { 164 struct ps_compile_args super; 165 WORD bools; 166 WORD clip; /* only a boolean, use a WORD for alignment */ 167 unsigned char loop_ctrl[WINED3D_MAX_CONSTS_I][3]; 168 }; 169 170 struct stb_const_desc 171 { 172 unsigned char texunit; 173 UINT const_num; 174 }; 175 176 struct arb_ps_compiled_shader 177 { 178 struct arb_ps_compile_args args; 179 struct arb_ps_np2fixup_info np2fixup_info; 180 struct stb_const_desc bumpenvmatconst[MAX_TEXTURES]; 181 struct stb_const_desc luminanceconst[MAX_TEXTURES]; 182 UINT int_consts[WINED3D_MAX_CONSTS_I]; 183 GLuint prgId; 184 UINT ycorrection; 185 unsigned char numbumpenvmatconsts; 186 char num_int_consts; 187 }; 188 189 struct arb_vs_compile_args 190 { 191 struct vs_compile_args super; 192 union 193 { 194 struct 195 { 196 WORD bools; 197 unsigned char clip_texcoord; 198 unsigned char clipplane_mask; 199 } boolclip; 200 DWORD boolclip_compare; 201 } clip; 202 DWORD ps_signature; 203 union 204 { 205 unsigned char samplers[4]; 206 DWORD samplers_compare; 207 } vertex; 208 unsigned char loop_ctrl[WINED3D_MAX_CONSTS_I][3]; 209 }; 210 211 struct arb_vs_compiled_shader 212 { 213 struct arb_vs_compile_args args; 214 GLuint prgId; 215 UINT int_consts[WINED3D_MAX_CONSTS_I]; 216 char num_int_consts; 217 char need_color_unclamp; 218 UINT pos_fixup; 219 }; 220 221 struct recorded_instruction 222 { 223 struct wined3d_shader_instruction ins; 224 struct list entry; 225 }; 226 227 struct shader_arb_ctx_priv 228 { 229 char addr_reg[20]; 230 enum 231 { 232 /* plain GL_ARB_vertex_program or GL_ARB_fragment_program */ 233 ARB, 234 /* GL_NV_vertex_program2_option or GL_NV_fragment_program_option */ 235 NV2, 236 /* GL_NV_vertex_program3 or GL_NV_fragment_program2 */ 237 NV3 238 } target_version; 239 240 const struct arb_vs_compile_args *cur_vs_args; 241 const struct arb_ps_compile_args *cur_ps_args; 242 const struct arb_ps_compiled_shader *compiled_fprog; 243 const struct arb_vs_compiled_shader *compiled_vprog; 244 struct arb_ps_np2fixup_info *cur_np2fixup_info; 245 struct list control_frames; 246 struct list record; 247 BOOL recording; 248 BOOL muted; 249 unsigned int num_loops, loop_depth, num_ifcs; 250 int aL; 251 BOOL ps_post_process; 252 253 unsigned int vs_clipplanes; 254 BOOL footer_written; 255 BOOL in_main_func; 256 257 /* For 3.0 vertex shaders */ 258 const char *vs_output[MAX_REG_OUTPUT]; 259 /* For 2.x and earlier vertex shaders */ 260 const char *texcrd_output[8], *color_output[2], *fog_output; 261 262 /* 3.0 pshader input for compatibility with fixed function */ 263 const char *ps_input[MAX_REG_INPUT]; 264 }; 265 266 struct ps_signature 267 { 268 struct wined3d_shader_signature sig; 269 DWORD idx; 270 struct wine_rb_entry entry; 271 }; 272 273 struct arb_pshader_private { 274 struct arb_ps_compiled_shader *gl_shaders; 275 UINT num_gl_shaders, shader_array_size; 276 DWORD input_signature_idx; 277 DWORD clipplane_emulation; 278 BOOL clamp_consts; 279 }; 280 281 struct arb_vshader_private { 282 struct arb_vs_compiled_shader *gl_shaders; 283 UINT num_gl_shaders, shader_array_size; 284 UINT rel_offset; 285 }; 286 287 struct shader_arb_priv 288 { 289 GLuint current_vprogram_id; 290 GLuint current_fprogram_id; 291 const struct arb_ps_compiled_shader *compiled_fprog; 292 const struct arb_vs_compiled_shader *compiled_vprog; 293 BOOL use_arbfp_fixed_func; 294 struct wine_rb_tree fragment_shaders; 295 BOOL last_ps_const_clamped; 296 BOOL last_vs_color_unclamp; 297 298 struct wine_rb_tree signature_tree; 299 DWORD ps_sig_number; 300 301 unsigned int highest_dirty_ps_const, highest_dirty_vs_const; 302 char vshader_const_dirty[WINED3D_MAX_VS_CONSTS_F]; 303 char pshader_const_dirty[WINED3D_MAX_PS_CONSTS_F]; 304 const struct wined3d_context *last_context; 305 306 const struct wined3d_vertex_pipe_ops *vertex_pipe; 307 const struct fragment_pipeline *fragment_pipe; 308 BOOL ffp_proj_control; 309 }; 310 311 /* Context activation for state handlers is done by the caller. */ 312 313 static BOOL need_rel_addr_const(const struct arb_vshader_private *shader_data, 314 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 315 { 316 if (shader_data->rel_offset) return TRUE; 317 if (!reg_maps->usesmova) return FALSE; 318 return !gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]; 319 } 320 321 /* Returns TRUE if result.clip from GL_NV_vertex_program2 should be used and FALSE otherwise */ 322 static inline BOOL use_nv_clip(const struct wined3d_gl_info *gl_info) 323 { 324 return gl_info->supported[NV_VERTEX_PROGRAM2_OPTION] 325 && !(gl_info->quirks & WINED3D_QUIRK_NV_CLIP_BROKEN); 326 } 327 328 static BOOL need_helper_const(const struct arb_vshader_private *shader_data, 329 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 330 { 331 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) return TRUE; 332 if (!gl_info->supported[NV_VERTEX_PROGRAM]) return TRUE; /* Need to init colors. */ 333 if (gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) return TRUE; /* Load the immval offset. */ 334 if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) return TRUE; /* Have to init texcoords. */ 335 if (!use_nv_clip(gl_info)) return TRUE; /* Init the clip texcoord */ 336 if (reg_maps->usesnrm) return TRUE; /* 0.0 */ 337 if (reg_maps->usespow) return TRUE; /* EPS, 0.0 and 1.0 */ 338 if (reg_maps->fog) return TRUE; /* Clamping fog coord, 0.0 and 1.0 */ 339 return FALSE; 340 } 341 342 static unsigned int reserved_vs_const(const struct arb_vshader_private *shader_data, 343 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 344 { 345 unsigned int ret = 1; 346 /* We use one PARAM for the pos fixup, and in some cases one to load 347 * some immediate values into the shader. */ 348 if (need_helper_const(shader_data, reg_maps, gl_info)) ++ret; 349 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) ++ret; 350 return ret; 351 } 352 353 /* Loads floating point constants into the currently set ARB_vertex/fragment_program. 354 * When constant_list == NULL, it will load all the constants. 355 * 356 * @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders) 357 * or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders) 358 */ 359 /* Context activation is done by the caller. */ 360 static unsigned int shader_arb_load_constants_f(const struct wined3d_shader *shader, 361 const struct wined3d_gl_info *gl_info, GLuint target_type, unsigned int max_constants, 362 const struct wined3d_vec4 *constants, char *dirty_consts) 363 { 364 struct wined3d_shader_lconst *lconst; 365 unsigned int ret, i, j; 366 367 if (TRACE_ON(d3d_constants)) 368 { 369 for (i = 0; i < max_constants; ++i) 370 { 371 if (!dirty_consts[i]) 372 continue; 373 TRACE_(d3d_constants)("Loading constant %u: %s.\n", i, debug_vec4(&constants[i])); 374 } 375 } 376 377 i = 0; 378 379 /* In 1.X pixel shaders constants are implicitly clamped in the range [-1;1] */ 380 if (target_type == GL_FRAGMENT_PROGRAM_ARB && shader->reg_maps.shader_version.major == 1) 381 { 382 float lcl_const[4]; 383 /* ps 1.x supports only 8 constants, clamp only those. When switching between 1.x and higher 384 * shaders, the first 8 constants are marked dirty for reload 385 */ 386 for (; i < min(8, max_constants); ++i) 387 { 388 if (!dirty_consts[i]) 389 continue; 390 dirty_consts[i] = 0; 391 392 if (constants[i].x > 1.0f) 393 lcl_const[0] = 1.0f; 394 else if (constants[i].x < -1.0f) 395 lcl_const[0] = -1.0f; 396 else 397 lcl_const[0] = constants[i].x; 398 399 if (constants[i].y > 1.0f) 400 lcl_const[1] = 1.0f; 401 else if (constants[i].y < -1.0f) 402 lcl_const[1] = -1.0f; 403 else 404 lcl_const[1] = constants[i].y; 405 406 if (constants[i].z > 1.0f) 407 lcl_const[2] = 1.0f; 408 else if (constants[i].z < -1.0f) 409 lcl_const[2] = -1.0f; 410 else 411 lcl_const[2] = constants[i].z; 412 413 if (constants[i].w > 1.0f) 414 lcl_const[3] = 1.0f; 415 else if (constants[i].w < -1.0f) 416 lcl_const[3] = -1.0f; 417 else 418 lcl_const[3] = constants[i].w; 419 420 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, lcl_const)); 421 } 422 423 /* If further constants are dirty, reload them without clamping. 424 * 425 * The alternative is not to touch them, but then we cannot reset the dirty constant count 426 * to zero. That's bad for apps that only use PS 1.x shaders, because in that case the code 427 * above would always re-check the first 8 constants since max_constant remains at the init 428 * value 429 */ 430 } 431 432 if (gl_info->supported[EXT_GPU_PROGRAM_PARAMETERS]) 433 { 434 /* TODO: Benchmark if we're better of with finding the dirty constants ourselves, 435 * or just reloading *all* constants at once 436 * 437 GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, i, max_constants, constants + (i * 4))); 438 */ 439 for (; i < max_constants; ++i) 440 { 441 if (!dirty_consts[i]) 442 continue; 443 444 /* Find the next block of dirty constants */ 445 dirty_consts[i] = 0; 446 j = i; 447 for (++i; (i < max_constants) && dirty_consts[i]; ++i) 448 { 449 dirty_consts[i] = 0; 450 } 451 452 GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, j, i - j, &constants[j].x)); 453 } 454 } 455 else 456 { 457 for (; i < max_constants; ++i) 458 { 459 if (dirty_consts[i]) 460 { 461 dirty_consts[i] = 0; 462 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, &constants[i].x)); 463 } 464 } 465 } 466 checkGLcall("glProgramEnvParameter4fvARB()"); 467 468 /* Load immediate constants */ 469 if (shader->load_local_constsF) 470 { 471 if (TRACE_ON(d3d_shader)) 472 { 473 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 474 { 475 GLfloat* values = (GLfloat*)lconst->value; 476 TRACE_(d3d_constants)("Loading local constants %i: %f, %f, %f, %f\n", lconst->idx, 477 values[0], values[1], values[2], values[3]); 478 } 479 } 480 /* Immediate constants are clamped for 1.X shaders at loading times */ 481 ret = 0; 482 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 483 { 484 dirty_consts[lconst->idx] = 1; /* Dirtify so the non-immediate constant overwrites it next time */ 485 ret = max(ret, lconst->idx + 1); 486 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, lconst->idx, (GLfloat*)lconst->value)); 487 } 488 checkGLcall("glProgramEnvParameter4fvARB()"); 489 return ret; /* The loaded immediate constants need reloading for the next shader */ 490 } else { 491 return 0; /* No constants are dirty now */ 492 } 493 } 494 495 /* Loads the texture dimensions for NP2 fixup into the currently set 496 * ARB_[vertex/fragment]_programs. */ 497 static void shader_arb_load_np2fixup_constants(const struct arb_ps_np2fixup_info *fixup, 498 const struct wined3d_gl_info *gl_info, const struct wined3d_state *state) 499 { 500 GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS]; 501 WORD active = fixup->super.active; 502 UINT i; 503 504 if (!active) 505 return; 506 507 for (i = 0; active; active >>= 1, ++i) 508 { 509 const struct wined3d_texture *tex = state->textures[i]; 510 unsigned char idx = fixup->super.idx[i]; 511 GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4]; 512 513 if (!(active & 1)) 514 continue; 515 516 if (!tex) 517 { 518 ERR("Nonexistent texture is flagged for NP2 texcoord fixup.\n"); 519 continue; 520 } 521 522 if (idx % 2) 523 { 524 tex_dim[2] = tex->pow2_matrix[0]; 525 tex_dim[3] = tex->pow2_matrix[5]; 526 } 527 else 528 { 529 tex_dim[0] = tex->pow2_matrix[0]; 530 tex_dim[1] = tex->pow2_matrix[5]; 531 } 532 } 533 534 for (i = 0; i < fixup->super.num_consts; ++i) 535 { 536 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 537 fixup->offset + i, &np2fixup_constants[i * 4])); 538 } 539 } 540 541 /* Context activation is done by the caller. */ 542 static void shader_arb_ps_local_constants(const struct arb_ps_compiled_shader *gl_shader, 543 const struct wined3d_context *context, const struct wined3d_state *state, UINT rt_height) 544 { 545 const struct wined3d_gl_info *gl_info = context->gl_info; 546 unsigned char i; 547 548 for(i = 0; i < gl_shader->numbumpenvmatconsts; i++) 549 { 550 int texunit = gl_shader->bumpenvmatconst[i].texunit; 551 552 /* The state manager takes care that this function is always called if the bump env matrix changes */ 553 const float *data = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_MAT00]; 554 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 555 gl_shader->bumpenvmatconst[i].const_num, data)); 556 557 if (gl_shader->luminanceconst[i].const_num != WINED3D_CONST_NUM_UNUSED) 558 { 559 /* WINED3D_TSS_BUMPENVLSCALE and WINED3D_TSS_BUMPENVLOFFSET are next to each other. 560 * point gl to the scale, and load 4 floats. x = scale, y = offset, z and w are junk, we 561 * don't care about them. The pointers are valid for sure because the stateblock is bigger. 562 * (they're WINED3D_TSS_TEXTURETRANSFORMFLAGS and WINED3D_TSS_ADDRESSW, so most likely 0 or NaN 563 */ 564 const float *scale = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_LSCALE]; 565 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 566 gl_shader->luminanceconst[i].const_num, scale)); 567 } 568 } 569 checkGLcall("Load bumpmap consts"); 570 571 if(gl_shader->ycorrection != WINED3D_CONST_NUM_UNUSED) 572 { 573 /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen). 574 * ycorrection.y: -1.0(onscreen), 1.0(offscreen) 575 * ycorrection.z: 1.0 576 * ycorrection.w: 0.0 577 */ 578 float val[4]; 579 val[0] = context->render_offscreen ? 0.0f : (float) rt_height; 580 val[1] = context->render_offscreen ? 1.0f : -1.0f; 581 val[2] = 1.0f; 582 val[3] = 0.0f; 583 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->ycorrection, val)); 584 checkGLcall("y correction loading"); 585 } 586 587 if (!gl_shader->num_int_consts) return; 588 589 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 590 { 591 if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED) 592 { 593 float val[4]; 594 val[0] = (float)state->ps_consts_i[i].x; 595 val[1] = (float)state->ps_consts_i[i].y; 596 val[2] = (float)state->ps_consts_i[i].z; 597 val[3] = -1.0f; 598 599 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->int_consts[i], val)); 600 } 601 } 602 checkGLcall("Load ps int consts"); 603 } 604 605 /* Context activation is done by the caller. */ 606 static void shader_arb_vs_local_constants(const struct arb_vs_compiled_shader *gl_shader, 607 const struct wined3d_context *context, const struct wined3d_state *state) 608 { 609 const struct wined3d_gl_info *gl_info = context->gl_info; 610 float position_fixup[4]; 611 unsigned char i; 612 613 /* Upload the position fixup */ 614 shader_get_position_fixup(context, state, position_fixup); 615 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->pos_fixup, position_fixup)); 616 617 if (!gl_shader->num_int_consts) return; 618 619 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 620 { 621 if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED) 622 { 623 float val[4]; 624 val[0] = (float)state->vs_consts_i[i].x; 625 val[1] = (float)state->vs_consts_i[i].y; 626 val[2] = (float)state->vs_consts_i[i].z; 627 val[3] = -1.0f; 628 629 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->int_consts[i], val)); 630 } 631 } 632 checkGLcall("Load vs int consts"); 633 } 634 635 static void shader_arb_select(void *shader_priv, struct wined3d_context *context, 636 const struct wined3d_state *state); 637 638 /** 639 * Loads the app-supplied constants into the currently set ARB_[vertex/fragment]_programs. 640 * 641 * We only support float constants in ARB at the moment, so don't 642 * worry about the Integers or Booleans 643 */ 644 /* Context activation is done by the caller (state handler). */ 645 static void shader_arb_load_constants_internal(struct shader_arb_priv *priv, 646 struct wined3d_context *context, const struct wined3d_state *state, 647 BOOL usePixelShader, BOOL useVertexShader, BOOL from_shader_select) 648 { 649 const struct wined3d_d3d_info *d3d_info = context->d3d_info; 650 const struct wined3d_gl_info *gl_info = context->gl_info; 651 652 if (!from_shader_select) 653 { 654 const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX]; 655 const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 656 657 if (vshader 658 && (vshader->reg_maps.boolean_constants 659 || (!gl_info->supported[NV_VERTEX_PROGRAM2_OPTION] 660 && (vshader->reg_maps.integer_constants & ~vshader->reg_maps.local_int_consts)))) 661 { 662 TRACE("bool/integer vertex shader constants potentially modified, forcing shader reselection.\n"); 663 shader_arb_select(priv, context, state); 664 } 665 else if (pshader 666 && (pshader->reg_maps.boolean_constants 667 || (!gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION] 668 && (pshader->reg_maps.integer_constants & ~pshader->reg_maps.local_int_consts)))) 669 { 670 TRACE("bool/integer pixel shader constants potentially modified, forcing shader reselection.\n"); 671 shader_arb_select(priv, context, state); 672 } 673 } 674 675 if (context != priv->last_context) 676 { 677 memset(priv->vshader_const_dirty, 1, 678 sizeof(*priv->vshader_const_dirty) * d3d_info->limits.vs_uniform_count); 679 priv->highest_dirty_vs_const = d3d_info->limits.vs_uniform_count; 680 681 memset(priv->pshader_const_dirty, 1, 682 sizeof(*priv->pshader_const_dirty) * d3d_info->limits.ps_uniform_count); 683 priv->highest_dirty_ps_const = d3d_info->limits.ps_uniform_count; 684 685 priv->last_context = context; 686 } 687 688 if (useVertexShader) 689 { 690 const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX]; 691 const struct arb_vs_compiled_shader *gl_shader = priv->compiled_vprog; 692 693 /* Load DirectX 9 float constants for vertex shader */ 694 priv->highest_dirty_vs_const = shader_arb_load_constants_f(vshader, gl_info, GL_VERTEX_PROGRAM_ARB, 695 priv->highest_dirty_vs_const, state->vs_consts_f, priv->vshader_const_dirty); 696 shader_arb_vs_local_constants(gl_shader, context, state); 697 } 698 699 if (usePixelShader) 700 { 701 const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 702 const struct arb_ps_compiled_shader *gl_shader = priv->compiled_fprog; 703 UINT rt_height = state->fb->render_targets[0]->height; 704 705 /* Load DirectX 9 float constants for pixel shader */ 706 priv->highest_dirty_ps_const = shader_arb_load_constants_f(pshader, gl_info, GL_FRAGMENT_PROGRAM_ARB, 707 priv->highest_dirty_ps_const, state->ps_consts_f, priv->pshader_const_dirty); 708 shader_arb_ps_local_constants(gl_shader, context, state, rt_height); 709 710 if (context->constant_update_mask & WINED3D_SHADER_CONST_PS_NP2_FIXUP) 711 shader_arb_load_np2fixup_constants(&gl_shader->np2fixup_info, gl_info, state); 712 } 713 } 714 715 static void shader_arb_load_constants(void *shader_priv, struct wined3d_context *context, 716 const struct wined3d_state *state) 717 { 718 BOOL vs = use_vs(state); 719 BOOL ps = use_ps(state); 720 721 shader_arb_load_constants_internal(shader_priv, context, state, ps, vs, FALSE); 722 } 723 724 static void shader_arb_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count) 725 { 726 struct wined3d_context *context = context_get_current(); 727 struct shader_arb_priv *priv = device->shader_priv; 728 729 /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active 730 * context. On a context switch the old context will be fully dirtified */ 731 if (!context || context->device != device) 732 return; 733 734 memset(priv->vshader_const_dirty + start, 1, sizeof(*priv->vshader_const_dirty) * count); 735 priv->highest_dirty_vs_const = max(priv->highest_dirty_vs_const, start + count); 736 } 737 738 static void shader_arb_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count) 739 { 740 struct wined3d_context *context = context_get_current(); 741 struct shader_arb_priv *priv = device->shader_priv; 742 743 /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active 744 * context. On a context switch the old context will be fully dirtified */ 745 if (!context || context->device != device) 746 return; 747 748 memset(priv->pshader_const_dirty + start, 1, sizeof(*priv->pshader_const_dirty) * count); 749 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, start + count); 750 } 751 752 static void shader_arb_append_imm_vec4(struct wined3d_string_buffer *buffer, const float *values) 753 { 754 char str[4][17]; 755 756 wined3d_ftoa(values[0], str[0]); 757 wined3d_ftoa(values[1], str[1]); 758 wined3d_ftoa(values[2], str[2]); 759 wined3d_ftoa(values[3], str[3]); 760 shader_addline(buffer, "{%s, %s, %s, %s}", str[0], str[1], str[2], str[3]); 761 } 762 763 /* Generate the variable & register declarations for the ARB_vertex_program output target */ 764 static void shader_generate_arb_declarations(const struct wined3d_shader *shader, 765 const struct wined3d_shader_reg_maps *reg_maps, struct wined3d_string_buffer *buffer, 766 const struct wined3d_gl_info *gl_info, DWORD *num_clipplanes, 767 const struct shader_arb_ctx_priv *ctx) 768 { 769 DWORD i; 770 char pshader = shader_is_pshader_version(reg_maps->shader_version.type); 771 const struct wined3d_shader_lconst *lconst; 772 unsigned max_constantsF; 773 DWORD map; 774 775 /* In pixel shaders, all private constants are program local, we don't need anything 776 * from program.env. Thus we can advertise the full set of constants in pixel shaders. 777 * If we need a private constant the GL implementation will squeeze it in somewhere 778 * 779 * With vertex shaders we need the posFixup and on some GL implementations 4 helper 780 * immediate values. The posFixup is loaded using program.env for now, so always 781 * subtract one from the number of constants. If the shader uses indirect addressing, 782 * account for the helper const too because we have to declare all available d3d constants 783 * and don't know which are actually used. 784 */ 785 if (pshader) 786 { 787 max_constantsF = gl_info->limits.arb_ps_native_constants; 788 /* 24 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. */ 789 if (max_constantsF < 24) 790 max_constantsF = gl_info->limits.arb_ps_float_constants; 791 } 792 else 793 { 794 const struct arb_vshader_private *shader_data = shader->backend_data; 795 max_constantsF = gl_info->limits.arb_vs_native_constants; 796 /* 96 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. 797 * Also prevents max_constantsF from becoming less than 0 and 798 * wrapping . */ 799 if (max_constantsF < 96) 800 max_constantsF = gl_info->limits.arb_vs_float_constants; 801 802 if (reg_maps->usesrelconstF) 803 { 804 DWORD highest_constf = 0, clip_limit; 805 806 max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info); 807 max_constantsF -= wined3d_popcount(reg_maps->integer_constants); 808 max_constantsF -= gl_info->reserved_arb_constants; 809 810 for (i = 0; i < shader->limits->constant_float; ++i) 811 { 812 DWORD idx = i >> 5; 813 DWORD shift = i & 0x1f; 814 if (reg_maps->constf[idx] & (1u << shift)) 815 highest_constf = i; 816 } 817 818 if(use_nv_clip(gl_info) && ctx->target_version >= NV2) 819 { 820 if(ctx->cur_vs_args->super.clip_enabled) 821 clip_limit = gl_info->limits.user_clip_distances; 822 else 823 clip_limit = 0; 824 } 825 else 826 { 827 unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask; 828 clip_limit = min(wined3d_popcount(mask), 4); 829 } 830 *num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1); 831 max_constantsF -= *num_clipplanes; 832 if(*num_clipplanes < clip_limit) 833 { 834 WARN("Only %u clip planes out of %u enabled.\n", *num_clipplanes, 835 gl_info->limits.user_clip_distances); 836 } 837 } 838 else 839 { 840 if (ctx->target_version >= NV2) 841 *num_clipplanes = gl_info->limits.user_clip_distances; 842 else 843 *num_clipplanes = min(gl_info->limits.user_clip_distances, 4); 844 } 845 } 846 847 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) 848 { 849 if (map & 1) shader_addline(buffer, "TEMP R%u;\n", i); 850 } 851 852 for (i = 0, map = reg_maps->address; map; map >>= 1, ++i) 853 { 854 if (map & 1) shader_addline(buffer, "ADDRESS A%u;\n", i); 855 } 856 857 if (pshader && reg_maps->shader_version.major == 1 && reg_maps->shader_version.minor <= 3) 858 { 859 for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i) 860 { 861 if (map & 1) shader_addline(buffer, "TEMP T%u;\n", i); 862 } 863 } 864 865 if (!shader->load_local_constsF) 866 { 867 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 868 { 869 const float *value; 870 value = (const float *)lconst->value; 871 shader_addline(buffer, "PARAM C%u = ", lconst->idx); 872 shader_arb_append_imm_vec4(buffer, value); 873 shader_addline(buffer, ";\n"); 874 } 875 } 876 877 /* After subtracting privately used constants from the hardware limit(they are loaded as 878 * local constants), make sure the shader doesn't violate the env constant limit 879 */ 880 if (pshader) 881 { 882 max_constantsF = min(max_constantsF, gl_info->limits.arb_ps_float_constants); 883 } 884 else 885 { 886 max_constantsF = min(max_constantsF, gl_info->limits.arb_vs_float_constants); 887 } 888 889 /* Avoid declaring more constants than needed */ 890 max_constantsF = min(max_constantsF, shader->limits->constant_float); 891 892 /* we use the array-based constants array if the local constants are marked for loading, 893 * because then we use indirect addressing, or when the local constant list is empty, 894 * because then we don't know if we're using indirect addressing or not. If we're hardcoding 895 * local constants do not declare the loaded constants as an array because ARB compilers usually 896 * do not optimize unused constants away 897 */ 898 if (reg_maps->usesrelconstF) 899 { 900 /* Need to PARAM the environment parameters (constants) so we can use relative addressing */ 901 shader_addline(buffer, "PARAM C[%d] = { program.env[0..%d] };\n", 902 max_constantsF, max_constantsF - 1); 903 } 904 else 905 { 906 for (i = 0; i < max_constantsF; ++i) 907 { 908 if (!shader_constant_is_local(shader, i) && wined3d_extract_bits(reg_maps->constf, i, 1)) 909 { 910 shader_addline(buffer, "PARAM C%d = program.env[%d];\n",i, i); 911 } 912 } 913 } 914 } 915 916 static const char * const shift_tab[] = { 917 "dummy", /* 0 (none) */ 918 "coefmul.x", /* 1 (x2) */ 919 "coefmul.y", /* 2 (x4) */ 920 "coefmul.z", /* 3 (x8) */ 921 "coefmul.w", /* 4 (x16) */ 922 "dummy", /* 5 (x32) */ 923 "dummy", /* 6 (x64) */ 924 "dummy", /* 7 (x128) */ 925 "dummy", /* 8 (d256) */ 926 "dummy", /* 9 (d128) */ 927 "dummy", /* 10 (d64) */ 928 "dummy", /* 11 (d32) */ 929 "coefdiv.w", /* 12 (d16) */ 930 "coefdiv.z", /* 13 (d8) */ 931 "coefdiv.y", /* 14 (d4) */ 932 "coefdiv.x" /* 15 (d2) */ 933 }; 934 935 static void shader_arb_get_write_mask(const struct wined3d_shader_instruction *ins, 936 const struct wined3d_shader_dst_param *dst, char *write_mask) 937 { 938 char *ptr = write_mask; 939 940 if (dst->write_mask != WINED3DSP_WRITEMASK_ALL) 941 { 942 *ptr++ = '.'; 943 if (dst->write_mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x'; 944 if (dst->write_mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y'; 945 if (dst->write_mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z'; 946 if (dst->write_mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w'; 947 } 948 949 *ptr = '\0'; 950 } 951 952 static void shader_arb_get_swizzle(const struct wined3d_shader_src_param *param, BOOL fixup, char *swizzle_str) 953 { 954 /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra", 955 * but addressed as "rgba". To fix this we need to swap the register's x 956 * and z components. */ 957 const char *swizzle_chars = fixup ? "zyxw" : "xyzw"; 958 char *ptr = swizzle_str; 959 960 /* swizzle bits fields: wwzzyyxx */ 961 DWORD swizzle = param->swizzle; 962 DWORD swizzle_x = swizzle & 0x03; 963 DWORD swizzle_y = (swizzle >> 2) & 0x03; 964 DWORD swizzle_z = (swizzle >> 4) & 0x03; 965 DWORD swizzle_w = (swizzle >> 6) & 0x03; 966 967 /* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to 968 * generate a swizzle string. Unless we need to our own swizzling. */ 969 if (swizzle != WINED3DSP_NOSWIZZLE || fixup) 970 { 971 *ptr++ = '.'; 972 if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) { 973 *ptr++ = swizzle_chars[swizzle_x]; 974 } else { 975 *ptr++ = swizzle_chars[swizzle_x]; 976 *ptr++ = swizzle_chars[swizzle_y]; 977 *ptr++ = swizzle_chars[swizzle_z]; 978 *ptr++ = swizzle_chars[swizzle_w]; 979 } 980 } 981 982 *ptr = '\0'; 983 } 984 985 static void shader_arb_request_a0(const struct wined3d_shader_instruction *ins, const char *src) 986 { 987 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 988 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 989 990 if (!strcmp(priv->addr_reg, src)) return; 991 992 strcpy(priv->addr_reg, src); 993 shader_addline(buffer, "ARL A0.x, %s;\n", src); 994 } 995 996 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins, 997 const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr); 998 999 static void shader_arb_get_register_name(const struct wined3d_shader_instruction *ins, 1000 const struct wined3d_shader_register *reg, char *register_name, BOOL *is_color) 1001 { 1002 /* oPos, oFog and oPts in D3D */ 1003 static const char * const rastout_reg_names[] = {"TMP_OUT", "TMP_FOGCOORD", "result.pointsize"}; 1004 const struct wined3d_shader *shader = ins->ctx->shader; 1005 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 1006 BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type); 1007 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1008 1009 *is_color = FALSE; 1010 1011 switch (reg->type) 1012 { 1013 case WINED3DSPR_TEMP: 1014 sprintf(register_name, "R%u", reg->idx[0].offset); 1015 break; 1016 1017 case WINED3DSPR_INPUT: 1018 if (pshader) 1019 { 1020 if (reg_maps->shader_version.major < 3) 1021 { 1022 if (!reg->idx[0].offset) 1023 strcpy(register_name, "fragment.color.primary"); 1024 else 1025 strcpy(register_name, "fragment.color.secondary"); 1026 } 1027 else 1028 { 1029 if (reg->idx[0].rel_addr) 1030 { 1031 char rel_reg[50]; 1032 shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg); 1033 1034 if (!strcmp(rel_reg, "**aL_emul**")) 1035 { 1036 DWORD idx = ctx->aL + reg->idx[0].offset; 1037 if(idx < MAX_REG_INPUT) 1038 { 1039 strcpy(register_name, ctx->ps_input[idx]); 1040 } 1041 else 1042 { 1043 ERR("Pixel shader input register out of bounds: %u\n", idx); 1044 sprintf(register_name, "out_of_bounds_%u", idx); 1045 } 1046 } 1047 else if (reg_maps->input_registers & 0x0300) 1048 { 1049 /* There are two ways basically: 1050 * 1051 * 1) Use the unrolling code that is used for loop emulation and unroll the loop. 1052 * That means trouble if the loop also contains a breakc or if the control values 1053 * aren't local constants. 1054 * 2) Generate an if block that checks if aL.y < 8, == 8 or == 9 and selects the 1055 * source dynamically. The trouble is that we cannot simply read aL.y because it 1056 * is an ADDRESS register. We could however push it, load .zw with a value and use 1057 * ADAC to load the condition code register and pop it again afterwards 1058 */ 1059 FIXME("Relative input register addressing with more than 8 registers\n"); 1060 1061 /* This is better than nothing for now */ 1062 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset); 1063 } 1064 else if(ctx->cur_ps_args->super.vp_mode != vertexshader) 1065 { 1066 /* This is problematic because we'd have to consult the ctx->ps_input strings 1067 * for where to find the varying. Some may be "0.0", others can be texcoords or 1068 * colors. This needs either a pipeline replacement to make the vertex shader feed 1069 * proper varyings, or loop unrolling 1070 * 1071 * For now use the texcoords and hope for the best 1072 */ 1073 FIXME("Non-vertex shader varying input with indirect addressing\n"); 1074 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset); 1075 } 1076 else 1077 { 1078 /* D3D supports indirect addressing only with aL in loop registers. The loop instruction 1079 * pulls GL_NV_fragment_program2 in 1080 */ 1081 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset); 1082 } 1083 } 1084 else 1085 { 1086 if (reg->idx[0].offset < MAX_REG_INPUT) 1087 { 1088 strcpy(register_name, ctx->ps_input[reg->idx[0].offset]); 1089 } 1090 else 1091 { 1092 ERR("Pixel shader input register out of bounds: %u\n", reg->idx[0].offset); 1093 sprintf(register_name, "out_of_bounds_%u", reg->idx[0].offset); 1094 } 1095 } 1096 } 1097 } 1098 else 1099 { 1100 if (ctx->cur_vs_args->super.swizzle_map & (1u << reg->idx[0].offset)) 1101 *is_color = TRUE; 1102 sprintf(register_name, "vertex.attrib[%u]", reg->idx[0].offset); 1103 } 1104 break; 1105 1106 case WINED3DSPR_CONST: 1107 if (!pshader && reg->idx[0].rel_addr) 1108 { 1109 const struct arb_vshader_private *shader_data = shader->backend_data; 1110 UINT rel_offset = ctx->target_version == ARB ? shader_data->rel_offset : 0; 1111 BOOL aL = FALSE; 1112 char rel_reg[50]; 1113 if (reg_maps->shader_version.major < 2) 1114 { 1115 sprintf(rel_reg, "A0.x"); 1116 } 1117 else 1118 { 1119 shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg); 1120 if (ctx->target_version == ARB) 1121 { 1122 if (!strcmp(rel_reg, "**aL_emul**")) 1123 { 1124 aL = TRUE; 1125 } else { 1126 shader_arb_request_a0(ins, rel_reg); 1127 sprintf(rel_reg, "A0.x"); 1128 } 1129 } 1130 } 1131 if (aL) 1132 sprintf(register_name, "C[%u]", ctx->aL + reg->idx[0].offset); 1133 else if (reg->idx[0].offset >= rel_offset) 1134 sprintf(register_name, "C[%s + %u]", rel_reg, reg->idx[0].offset - rel_offset); 1135 else 1136 sprintf(register_name, "C[%s - %u]", rel_reg, rel_offset - reg->idx[0].offset); 1137 } 1138 else 1139 { 1140 if (reg_maps->usesrelconstF) 1141 sprintf(register_name, "C[%u]", reg->idx[0].offset); 1142 else 1143 sprintf(register_name, "C%u", reg->idx[0].offset); 1144 } 1145 break; 1146 1147 case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */ 1148 if (pshader) 1149 { 1150 if (reg_maps->shader_version.major == 1 1151 && reg_maps->shader_version.minor <= 3) 1152 /* In ps <= 1.3, Tx is a temporary register as destination 1153 * to all instructions, and as source to most instructions. 1154 * For some instructions it is the texcoord input. Those 1155 * instructions know about the special use. */ 1156 sprintf(register_name, "T%u", reg->idx[0].offset); 1157 else 1158 /* In ps 1.4 and 2.x Tx is always a (read-only) varying. */ 1159 sprintf(register_name, "fragment.texcoord[%u]", reg->idx[0].offset); 1160 } 1161 else 1162 { 1163 if (reg_maps->shader_version.major == 1 || ctx->target_version >= NV2) 1164 sprintf(register_name, "A%u", reg->idx[0].offset); 1165 else 1166 sprintf(register_name, "A%u_SHADOW", reg->idx[0].offset); 1167 } 1168 break; 1169 1170 case WINED3DSPR_COLOROUT: 1171 if (ctx->ps_post_process && !reg->idx[0].offset) 1172 { 1173 strcpy(register_name, "TMP_COLOR"); 1174 } 1175 else 1176 { 1177 if (ctx->cur_ps_args->super.srgb_correction) 1178 FIXME("sRGB correction on higher render targets.\n"); 1179 if (reg_maps->rt_mask > 1) 1180 sprintf(register_name, "result.color[%u]", reg->idx[0].offset); 1181 else 1182 strcpy(register_name, "result.color"); 1183 } 1184 break; 1185 1186 case WINED3DSPR_RASTOUT: 1187 if (reg->idx[0].offset == 1) 1188 sprintf(register_name, "%s", ctx->fog_output); 1189 else 1190 sprintf(register_name, "%s", rastout_reg_names[reg->idx[0].offset]); 1191 break; 1192 1193 case WINED3DSPR_DEPTHOUT: 1194 strcpy(register_name, "result.depth"); 1195 break; 1196 1197 case WINED3DSPR_ATTROUT: 1198 /* case WINED3DSPR_OUTPUT: */ 1199 if (pshader) 1200 sprintf(register_name, "oD[%u]", reg->idx[0].offset); 1201 else 1202 strcpy(register_name, ctx->color_output[reg->idx[0].offset]); 1203 break; 1204 1205 case WINED3DSPR_TEXCRDOUT: 1206 if (pshader) 1207 sprintf(register_name, "oT[%u]", reg->idx[0].offset); 1208 else if (reg_maps->shader_version.major < 3) 1209 strcpy(register_name, ctx->texcrd_output[reg->idx[0].offset]); 1210 else 1211 strcpy(register_name, ctx->vs_output[reg->idx[0].offset]); 1212 break; 1213 1214 case WINED3DSPR_LOOP: 1215 if(ctx->target_version >= NV2) 1216 { 1217 /* Pshader has an implicitly declared loop index counter A0.x that cannot be renamed */ 1218 if(pshader) sprintf(register_name, "A0.x"); 1219 else sprintf(register_name, "aL.y"); 1220 } 1221 else 1222 { 1223 /* Unfortunately this code cannot return the value of ctx->aL here. An immediate value 1224 * would be valid, but if aL is used for indexing(its only use), there's likely an offset, 1225 * thus the result would be something like C[15 + 30], which is not valid in the ARB program 1226 * grammar. So return a marker for the emulated aL and intercept it in constant and varying 1227 * indexing 1228 */ 1229 sprintf(register_name, "**aL_emul**"); 1230 } 1231 1232 break; 1233 1234 case WINED3DSPR_CONSTINT: 1235 sprintf(register_name, "I%u", reg->idx[0].offset); 1236 break; 1237 1238 case WINED3DSPR_MISCTYPE: 1239 if (!reg->idx[0].offset) 1240 sprintf(register_name, "vpos"); 1241 else if (reg->idx[0].offset == 1) 1242 sprintf(register_name, "fragment.facing.x"); 1243 else 1244 FIXME("Unknown MISCTYPE register index %u.\n", reg->idx[0].offset); 1245 break; 1246 1247 default: 1248 FIXME("Unhandled register type %#x[%u].\n", reg->type, reg->idx[0].offset); 1249 sprintf(register_name, "unrecognized_register[%u]", reg->idx[0].offset); 1250 break; 1251 } 1252 } 1253 1254 static void shader_arb_get_dst_param(const struct wined3d_shader_instruction *ins, 1255 const struct wined3d_shader_dst_param *wined3d_dst, char *str) 1256 { 1257 char register_name[255]; 1258 char write_mask[6]; 1259 BOOL is_color; 1260 1261 shader_arb_get_register_name(ins, &wined3d_dst->reg, register_name, &is_color); 1262 strcpy(str, register_name); 1263 1264 shader_arb_get_write_mask(ins, wined3d_dst, write_mask); 1265 strcat(str, write_mask); 1266 } 1267 1268 static const char *shader_arb_get_fixup_swizzle(enum fixup_channel_source channel_source) 1269 { 1270 switch(channel_source) 1271 { 1272 case CHANNEL_SOURCE_ZERO: return "0"; 1273 case CHANNEL_SOURCE_ONE: return "1"; 1274 case CHANNEL_SOURCE_X: return "x"; 1275 case CHANNEL_SOURCE_Y: return "y"; 1276 case CHANNEL_SOURCE_Z: return "z"; 1277 case CHANNEL_SOURCE_W: return "w"; 1278 default: 1279 FIXME("Unhandled channel source %#x\n", channel_source); 1280 return "undefined"; 1281 } 1282 } 1283 1284 struct color_fixup_masks 1285 { 1286 DWORD source; 1287 DWORD sign; 1288 }; 1289 1290 static struct color_fixup_masks calc_color_correction(struct color_fixup_desc fixup, DWORD dst_mask) 1291 { 1292 struct color_fixup_masks masks = {0, 0}; 1293 1294 if (is_complex_fixup(fixup)) 1295 { 1296 enum complex_fixup complex_fixup = get_complex_fixup(fixup); 1297 FIXME("Complex fixup (%#x) not supported\n", complex_fixup); 1298 return masks; 1299 } 1300 1301 if (fixup.x_source != CHANNEL_SOURCE_X) 1302 masks.source |= WINED3DSP_WRITEMASK_0; 1303 if (fixup.y_source != CHANNEL_SOURCE_Y) 1304 masks.source |= WINED3DSP_WRITEMASK_1; 1305 if (fixup.z_source != CHANNEL_SOURCE_Z) 1306 masks.source |= WINED3DSP_WRITEMASK_2; 1307 if (fixup.w_source != CHANNEL_SOURCE_W) 1308 masks.source |= WINED3DSP_WRITEMASK_3; 1309 masks.source &= dst_mask; 1310 1311 if (fixup.x_sign_fixup) 1312 masks.sign |= WINED3DSP_WRITEMASK_0; 1313 if (fixup.y_sign_fixup) 1314 masks.sign |= WINED3DSP_WRITEMASK_1; 1315 if (fixup.z_sign_fixup) 1316 masks.sign |= WINED3DSP_WRITEMASK_2; 1317 if (fixup.w_sign_fixup) 1318 masks.sign |= WINED3DSP_WRITEMASK_3; 1319 masks.sign &= dst_mask; 1320 1321 return masks; 1322 } 1323 1324 static void gen_color_correction(struct wined3d_string_buffer *buffer, const char *dst, 1325 const char *src, const char *one, const char *two, 1326 struct color_fixup_desc fixup, struct color_fixup_masks masks) 1327 { 1328 const char *sign_fixup_src = dst; 1329 1330 if (masks.source) 1331 { 1332 if (masks.sign) 1333 sign_fixup_src = "TA"; 1334 1335 shader_addline(buffer, "SWZ %s, %s, %s, %s, %s, %s;\n", sign_fixup_src, src, 1336 shader_arb_get_fixup_swizzle(fixup.x_source), shader_arb_get_fixup_swizzle(fixup.y_source), 1337 shader_arb_get_fixup_swizzle(fixup.z_source), shader_arb_get_fixup_swizzle(fixup.w_source)); 1338 } 1339 else if (masks.sign) 1340 { 1341 sign_fixup_src = src; 1342 } 1343 1344 if (masks.sign) 1345 { 1346 char reg_mask[6]; 1347 char *ptr = reg_mask; 1348 1349 if (masks.sign != WINED3DSP_WRITEMASK_ALL) 1350 { 1351 *ptr++ = '.'; 1352 if (masks.sign & WINED3DSP_WRITEMASK_0) 1353 *ptr++ = 'x'; 1354 if (masks.sign & WINED3DSP_WRITEMASK_1) 1355 *ptr++ = 'y'; 1356 if (masks.sign & WINED3DSP_WRITEMASK_2) 1357 *ptr++ = 'z'; 1358 if (masks.sign & WINED3DSP_WRITEMASK_3) 1359 *ptr++ = 'w'; 1360 } 1361 *ptr = '\0'; 1362 1363 shader_addline(buffer, "MAD %s%s, %s, %s, -%s;\n", dst, reg_mask, sign_fixup_src, two, one); 1364 } 1365 } 1366 1367 static const char *shader_arb_get_modifier(const struct wined3d_shader_instruction *ins) 1368 { 1369 DWORD mod; 1370 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1371 if (!ins->dst_count) return ""; 1372 1373 mod = ins->dst[0].modifiers; 1374 1375 /* Silently ignore PARTIALPRECISION if it's not supported */ 1376 if(priv->target_version == ARB) mod &= ~WINED3DSPDM_PARTIALPRECISION; 1377 1378 if(mod & WINED3DSPDM_MSAMPCENTROID) 1379 { 1380 FIXME("Unhandled modifier WINED3DSPDM_MSAMPCENTROID\n"); 1381 mod &= ~WINED3DSPDM_MSAMPCENTROID; 1382 } 1383 1384 switch(mod) 1385 { 1386 case WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION: 1387 return "H_SAT"; 1388 1389 case WINED3DSPDM_SATURATE: 1390 return "_SAT"; 1391 1392 case WINED3DSPDM_PARTIALPRECISION: 1393 return "H"; 1394 1395 case 0: 1396 return ""; 1397 1398 default: 1399 FIXME("Unknown modifiers 0x%08x\n", mod); 1400 return ""; 1401 } 1402 } 1403 1404 #define TEX_PROJ 0x1 1405 #define TEX_BIAS 0x2 1406 #define TEX_LOD 0x4 1407 #define TEX_DERIV 0x10 1408 1409 static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD sampler_idx, 1410 const char *dst_str, const char *coord_reg, WORD flags, const char *dsx, const char *dsy) 1411 { 1412 enum wined3d_shader_resource_type resource_type = ins->ctx->reg_maps->resource_info[sampler_idx].type; 1413 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1414 const char *tex_type; 1415 BOOL np2_fixup = FALSE; 1416 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1417 const char *mod; 1418 BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type); 1419 const struct wined3d_shader *shader; 1420 const struct wined3d_device *device; 1421 const struct wined3d_gl_info *gl_info; 1422 const char *tex_dst = dst_str; 1423 struct color_fixup_masks masks; 1424 1425 /* D3D vertex shader sampler IDs are vertex samplers(0-3), not global d3d samplers */ 1426 if(!pshader) sampler_idx += MAX_FRAGMENT_SAMPLERS; 1427 1428 switch (resource_type) 1429 { 1430 case WINED3D_SHADER_RESOURCE_TEXTURE_1D: 1431 tex_type = "1D"; 1432 break; 1433 1434 case WINED3D_SHADER_RESOURCE_TEXTURE_2D: 1435 shader = ins->ctx->shader; 1436 device = shader->device; 1437 gl_info = &device->adapter->gl_info; 1438 1439 if (pshader && priv->cur_ps_args->super.np2_fixup & (1u << sampler_idx) 1440 && gl_info->supported[ARB_TEXTURE_RECTANGLE]) 1441 tex_type = "RECT"; 1442 else 1443 tex_type = "2D"; 1444 if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) 1445 { 1446 if (priv->cur_np2fixup_info->super.active & (1u << sampler_idx)) 1447 { 1448 if (flags) FIXME("Only ordinary sampling from NP2 textures is supported.\n"); 1449 else np2_fixup = TRUE; 1450 } 1451 } 1452 break; 1453 1454 case WINED3D_SHADER_RESOURCE_TEXTURE_3D: 1455 tex_type = "3D"; 1456 break; 1457 1458 case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE: 1459 tex_type = "CUBE"; 1460 break; 1461 1462 default: 1463 ERR("Unexpected resource type %#x.\n", resource_type); 1464 tex_type = ""; 1465 } 1466 1467 /* TEX, TXL, TXD and TXP do not support the "H" modifier, 1468 * so don't use shader_arb_get_modifier 1469 */ 1470 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) mod = "_SAT"; 1471 else mod = ""; 1472 1473 /* Fragment samplers always have indentity mapping */ 1474 if(sampler_idx >= MAX_FRAGMENT_SAMPLERS) 1475 { 1476 sampler_idx = priv->cur_vs_args->vertex.samplers[sampler_idx - MAX_FRAGMENT_SAMPLERS]; 1477 } 1478 1479 if (pshader) 1480 { 1481 masks = calc_color_correction(priv->cur_ps_args->super.color_fixup[sampler_idx], 1482 ins->dst[0].write_mask); 1483 1484 if (masks.source || masks.sign) 1485 tex_dst = "TA"; 1486 } 1487 1488 if (flags & TEX_DERIV) 1489 { 1490 if(flags & TEX_PROJ) FIXME("Projected texture sampling with custom derivatives\n"); 1491 if(flags & TEX_BIAS) FIXME("Biased texture sampling with custom derivatives\n"); 1492 shader_addline(buffer, "TXD%s %s, %s, %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, 1493 dsx, dsy, sampler_idx, tex_type); 1494 } 1495 else if(flags & TEX_LOD) 1496 { 1497 if(flags & TEX_PROJ) FIXME("Projected texture sampling with explicit lod\n"); 1498 if(flags & TEX_BIAS) FIXME("Biased texture sampling with explicit lod\n"); 1499 shader_addline(buffer, "TXL%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, 1500 sampler_idx, tex_type); 1501 } 1502 else if (flags & TEX_BIAS) 1503 { 1504 /* Shouldn't be possible, but let's check for it */ 1505 if(flags & TEX_PROJ) FIXME("Biased and Projected texture sampling\n"); 1506 /* TXB takes the 4th component of the source vector automatically, as d3d. Nothing more to do */ 1507 shader_addline(buffer, "TXB%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type); 1508 } 1509 else if (flags & TEX_PROJ) 1510 { 1511 shader_addline(buffer, "TXP%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type); 1512 } 1513 else 1514 { 1515 if (np2_fixup) 1516 { 1517 const unsigned char idx = priv->cur_np2fixup_info->super.idx[sampler_idx]; 1518 shader_addline(buffer, "MUL TA, np2fixup[%u].%s, %s;\n", idx >> 1, 1519 (idx % 2) ? "zwxy" : "xyzw", coord_reg); 1520 1521 shader_addline(buffer, "TEX%s %s, TA, texture[%u], %s;\n", mod, tex_dst, sampler_idx, tex_type); 1522 } 1523 else 1524 shader_addline(buffer, "TEX%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type); 1525 } 1526 1527 if (pshader) 1528 { 1529 gen_color_correction(buffer, dst_str, tex_dst, 1530 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_ONE), 1531 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_TWO), 1532 priv->cur_ps_args->super.color_fixup[sampler_idx], masks); 1533 } 1534 } 1535 1536 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins, 1537 const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr) 1538 { 1539 /* Generate a line that does the input modifier computation and return the input register to use */ 1540 BOOL is_color = FALSE, insert_line; 1541 char regstr[256]; 1542 char swzstr[20]; 1543 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1544 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1545 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 1546 const char *two = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_TWO); 1547 1548 /* Assume a new line will be added */ 1549 insert_line = TRUE; 1550 1551 /* Get register name */ 1552 shader_arb_get_register_name(ins, &src->reg, regstr, &is_color); 1553 shader_arb_get_swizzle(src, is_color, swzstr); 1554 1555 switch (src->modifiers) 1556 { 1557 case WINED3DSPSM_NONE: 1558 sprintf(outregstr, "%s%s", regstr, swzstr); 1559 insert_line = FALSE; 1560 break; 1561 case WINED3DSPSM_NEG: 1562 sprintf(outregstr, "-%s%s", regstr, swzstr); 1563 insert_line = FALSE; 1564 break; 1565 case WINED3DSPSM_BIAS: 1566 shader_addline(buffer, "ADD T%c, %s, -coefdiv.x;\n", 'A' + tmpreg, regstr); 1567 break; 1568 case WINED3DSPSM_BIASNEG: 1569 shader_addline(buffer, "ADD T%c, -%s, coefdiv.x;\n", 'A' + tmpreg, regstr); 1570 break; 1571 case WINED3DSPSM_SIGN: 1572 shader_addline(buffer, "MAD T%c, %s, %s, -%s;\n", 'A' + tmpreg, regstr, two, one); 1573 break; 1574 case WINED3DSPSM_SIGNNEG: 1575 shader_addline(buffer, "MAD T%c, %s, -%s, %s;\n", 'A' + tmpreg, regstr, two, one); 1576 break; 1577 case WINED3DSPSM_COMP: 1578 shader_addline(buffer, "SUB T%c, %s, %s;\n", 'A' + tmpreg, one, regstr); 1579 break; 1580 case WINED3DSPSM_X2: 1581 shader_addline(buffer, "ADD T%c, %s, %s;\n", 'A' + tmpreg, regstr, regstr); 1582 break; 1583 case WINED3DSPSM_X2NEG: 1584 shader_addline(buffer, "ADD T%c, -%s, -%s;\n", 'A' + tmpreg, regstr, regstr); 1585 break; 1586 case WINED3DSPSM_DZ: 1587 shader_addline(buffer, "RCP T%c, %s.z;\n", 'A' + tmpreg, regstr); 1588 shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg); 1589 break; 1590 case WINED3DSPSM_DW: 1591 shader_addline(buffer, "RCP T%c, %s.w;\n", 'A' + tmpreg, regstr); 1592 shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg); 1593 break; 1594 case WINED3DSPSM_ABS: 1595 if(ctx->target_version >= NV2) { 1596 sprintf(outregstr, "|%s%s|", regstr, swzstr); 1597 insert_line = FALSE; 1598 } else { 1599 shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr); 1600 } 1601 break; 1602 case WINED3DSPSM_ABSNEG: 1603 if(ctx->target_version >= NV2) { 1604 sprintf(outregstr, "-|%s%s|", regstr, swzstr); 1605 } else { 1606 shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr); 1607 sprintf(outregstr, "-T%c%s", 'A' + tmpreg, swzstr); 1608 } 1609 insert_line = FALSE; 1610 break; 1611 default: 1612 sprintf(outregstr, "%s%s", regstr, swzstr); 1613 insert_line = FALSE; 1614 } 1615 1616 /* Return modified or original register, with swizzle */ 1617 if (insert_line) 1618 sprintf(outregstr, "T%c%s", 'A' + tmpreg, swzstr); 1619 } 1620 1621 static void pshader_hw_bem(const struct wined3d_shader_instruction *ins) 1622 { 1623 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1624 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1625 DWORD sampler_code = dst->reg.idx[0].offset; 1626 char dst_name[50]; 1627 char src_name[2][50]; 1628 1629 shader_arb_get_dst_param(ins, dst, dst_name); 1630 1631 /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed 1632 * 1633 * Keep in mind that src_name[1] can be "TB" and src_name[0] can be "TA" because modifiers like _x2 are valid 1634 * with bem. So delay loading the first parameter until after the perturbation calculation which needs two 1635 * temps is done. 1636 */ 1637 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1638 shader_addline(buffer, "SWZ TA, bumpenvmat%d, x, z, 0, 0;\n", sampler_code); 1639 shader_addline(buffer, "DP3 TC.r, TA, %s;\n", src_name[1]); 1640 shader_addline(buffer, "SWZ TA, bumpenvmat%d, y, w, 0, 0;\n", sampler_code); 1641 shader_addline(buffer, "DP3 TC.g, TA, %s;\n", src_name[1]); 1642 1643 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1644 shader_addline(buffer, "ADD %s, %s, TC;\n", dst_name, src_name[0]); 1645 } 1646 1647 static DWORD negate_modifiers(DWORD mod, char *extra_char) 1648 { 1649 *extra_char = ' '; 1650 switch(mod) 1651 { 1652 case WINED3DSPSM_NONE: return WINED3DSPSM_NEG; 1653 case WINED3DSPSM_NEG: return WINED3DSPSM_NONE; 1654 case WINED3DSPSM_BIAS: return WINED3DSPSM_BIASNEG; 1655 case WINED3DSPSM_BIASNEG: return WINED3DSPSM_BIAS; 1656 case WINED3DSPSM_SIGN: return WINED3DSPSM_SIGNNEG; 1657 case WINED3DSPSM_SIGNNEG: return WINED3DSPSM_SIGN; 1658 case WINED3DSPSM_COMP: *extra_char = '-'; return WINED3DSPSM_COMP; 1659 case WINED3DSPSM_X2: return WINED3DSPSM_X2NEG; 1660 case WINED3DSPSM_X2NEG: return WINED3DSPSM_X2; 1661 case WINED3DSPSM_DZ: *extra_char = '-'; return WINED3DSPSM_DZ; 1662 case WINED3DSPSM_DW: *extra_char = '-'; return WINED3DSPSM_DW; 1663 case WINED3DSPSM_ABS: return WINED3DSPSM_ABSNEG; 1664 case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS; 1665 } 1666 FIXME("Unknown modifier %u\n", mod); 1667 return mod; 1668 } 1669 1670 static void pshader_hw_cnd(const struct wined3d_shader_instruction *ins) 1671 { 1672 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1673 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1674 char dst_name[50]; 1675 char src_name[3][50]; 1676 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 1677 ins->ctx->reg_maps->shader_version.minor); 1678 1679 shader_arb_get_dst_param(ins, dst, dst_name); 1680 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1681 1682 if (shader_version <= WINED3D_SHADER_VERSION(1, 3) && ins->coissue 1683 && ins->dst->write_mask != WINED3DSP_WRITEMASK_3) 1684 { 1685 shader_addline(buffer, "MOV%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[1]); 1686 } 1687 else 1688 { 1689 struct wined3d_shader_src_param src0_copy = ins->src[0]; 1690 char extra_neg; 1691 1692 /* src0 may have a negate srcmod set, so we can't blindly add "-" to the name */ 1693 src0_copy.modifiers = negate_modifiers(src0_copy.modifiers, &extra_neg); 1694 1695 shader_arb_get_src_param(ins, &src0_copy, 0, src_name[0]); 1696 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1697 shader_addline(buffer, "ADD TA, %c%s, coefdiv.x;\n", extra_neg, src_name[0]); 1698 shader_addline(buffer, "CMP%s %s, TA, %s, %s;\n", shader_arb_get_modifier(ins), 1699 dst_name, src_name[1], src_name[2]); 1700 } 1701 } 1702 1703 static void pshader_hw_cmp(const struct wined3d_shader_instruction *ins) 1704 { 1705 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1706 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1707 char dst_name[50]; 1708 char src_name[3][50]; 1709 1710 shader_arb_get_dst_param(ins, dst, dst_name); 1711 1712 /* Generate input register names (with modifiers) */ 1713 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1714 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1715 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1716 1717 shader_addline(buffer, "CMP%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 1718 dst_name, src_name[0], src_name[2], src_name[1]); 1719 } 1720 1721 /** Process the WINED3DSIO_DP2ADD instruction in ARB. 1722 * dst = dot2(src0, src1) + src2 */ 1723 static void pshader_hw_dp2add(const struct wined3d_shader_instruction *ins) 1724 { 1725 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1726 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1727 char dst_name[50]; 1728 char src_name[3][50]; 1729 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1730 1731 shader_arb_get_dst_param(ins, dst, dst_name); 1732 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1733 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1734 1735 if(ctx->target_version >= NV3) 1736 { 1737 /* GL_NV_fragment_program2 has a 1:1 matching instruction */ 1738 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1739 shader_addline(buffer, "DP2A%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 1740 dst_name, src_name[0], src_name[1], src_name[2]); 1741 } 1742 else if(ctx->target_version >= NV2) 1743 { 1744 /* dst.x = src2.?, src0.x, src1.x + src0.y * src1.y 1745 * dst.y = src2.?, src0.x, src1.z + src0.y * src1.w 1746 * dst.z = src2.?, src0.x, src1.x + src0.y * src1.y 1747 * dst.z = src2.?, src0.x, src1.z + src0.y * src1.w 1748 * 1749 * Make sure that src1.zw = src1.xy, then we get a classic dp2add 1750 * 1751 * .xyxy and other swizzles that we could get with this are not valid in 1752 * plain ARBfp, but luckily the NV extension grammar lifts this limitation. 1753 */ 1754 struct wined3d_shader_src_param tmp_param = ins->src[1]; 1755 DWORD swizzle = tmp_param.swizzle & 0xf; /* Selects .xy */ 1756 tmp_param.swizzle = swizzle | (swizzle << 4); /* Creates .xyxy */ 1757 1758 shader_arb_get_src_param(ins, &tmp_param, 1, src_name[1]); 1759 1760 shader_addline(buffer, "X2D%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 1761 dst_name, src_name[2], src_name[0], src_name[1]); 1762 } 1763 else 1764 { 1765 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1766 /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2] 1767 * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite 1768 */ 1769 shader_addline(buffer, "MOV TA, %s;\n", src_name[0]); 1770 shader_addline(buffer, "MOV TA.z, 0.0;\n"); 1771 shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]); 1772 shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]); 1773 } 1774 } 1775 1776 /* Map the opcode 1-to-1 to the GL code */ 1777 static void shader_hw_map2gl(const struct wined3d_shader_instruction *ins) 1778 { 1779 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1780 const char *instruction; 1781 char arguments[256], dst_str[50]; 1782 unsigned int i; 1783 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1784 1785 switch (ins->handler_idx) 1786 { 1787 case WINED3DSIH_ABS: instruction = "ABS"; break; 1788 case WINED3DSIH_ADD: instruction = "ADD"; break; 1789 case WINED3DSIH_CRS: instruction = "XPD"; break; 1790 case WINED3DSIH_DP3: instruction = "DP3"; break; 1791 case WINED3DSIH_DP4: instruction = "DP4"; break; 1792 case WINED3DSIH_DST: instruction = "DST"; break; 1793 case WINED3DSIH_FRC: instruction = "FRC"; break; 1794 case WINED3DSIH_LIT: instruction = "LIT"; break; 1795 case WINED3DSIH_LRP: instruction = "LRP"; break; 1796 case WINED3DSIH_MAD: instruction = "MAD"; break; 1797 case WINED3DSIH_MAX: instruction = "MAX"; break; 1798 case WINED3DSIH_MIN: instruction = "MIN"; break; 1799 case WINED3DSIH_MOV: instruction = "MOV"; break; 1800 case WINED3DSIH_MUL: instruction = "MUL"; break; 1801 case WINED3DSIH_SGE: instruction = "SGE"; break; 1802 case WINED3DSIH_SLT: instruction = "SLT"; break; 1803 case WINED3DSIH_SUB: instruction = "SUB"; break; 1804 case WINED3DSIH_MOVA:instruction = "ARR"; break; 1805 case WINED3DSIH_DSX: instruction = "DDX"; break; 1806 default: instruction = ""; 1807 FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 1808 break; 1809 } 1810 1811 /* Note that shader_arb_add_dst_param() adds spaces. */ 1812 arguments[0] = '\0'; 1813 shader_arb_get_dst_param(ins, dst, dst_str); 1814 for (i = 0; i < ins->src_count; ++i) 1815 { 1816 char operand[100]; 1817 strcat(arguments, ", "); 1818 shader_arb_get_src_param(ins, &ins->src[i], i, operand); 1819 strcat(arguments, operand); 1820 } 1821 shader_addline(buffer, "%s%s %s%s;\n", instruction, shader_arb_get_modifier(ins), dst_str, arguments); 1822 } 1823 1824 static void shader_hw_nop(const struct wined3d_shader_instruction *ins) {} 1825 1826 static DWORD shader_arb_select_component(DWORD swizzle, DWORD component) 1827 { 1828 return ((swizzle >> 2 * component) & 0x3) * 0x55; 1829 } 1830 1831 static void shader_hw_mov(const struct wined3d_shader_instruction *ins) 1832 { 1833 const struct wined3d_shader *shader = ins->ctx->shader; 1834 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 1835 BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type); 1836 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1837 const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO); 1838 const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE); 1839 const char *two = arb_get_helper_value(reg_maps->shader_version.type, ARB_TWO); 1840 1841 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1842 char src0_param[256]; 1843 1844 if (ins->handler_idx == WINED3DSIH_MOVA) 1845 { 1846 const struct arb_vshader_private *shader_data = shader->backend_data; 1847 char write_mask[6]; 1848 const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET); 1849 1850 if(ctx->target_version >= NV2) { 1851 shader_hw_map2gl(ins); 1852 return; 1853 } 1854 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param); 1855 shader_arb_get_write_mask(ins, &ins->dst[0], write_mask); 1856 1857 /* This implements the mova formula used in GLSL. The first two instructions 1858 * prepare the sign() part. Note that it is fine to have my_sign(0.0) = 1.0 1859 * in this case: 1860 * mova A0.x, 0.0 1861 * 1862 * A0.x = arl(floor(abs(0.0) + 0.5) * 1.0) = floor(0.5) = 0.0 since arl does a floor 1863 * 1864 * The ARL is performed when A0 is used - the requested component is read from A0_SHADOW into 1865 * A0.x. We can use the overwritten component of A0_shadow as temporary storage for the sign. 1866 */ 1867 shader_addline(buffer, "SGE A0_SHADOW%s, %s, %s;\n", write_mask, src0_param, zero); 1868 shader_addline(buffer, "MAD A0_SHADOW%s, A0_SHADOW, %s, -%s;\n", write_mask, two, one); 1869 1870 shader_addline(buffer, "ABS TA%s, %s;\n", write_mask, src0_param); 1871 shader_addline(buffer, "ADD TA%s, TA, rel_addr_const.x;\n", write_mask); 1872 shader_addline(buffer, "FLR TA%s, TA;\n", write_mask); 1873 if (shader_data->rel_offset) 1874 { 1875 shader_addline(buffer, "ADD TA%s, TA, %s;\n", write_mask, offset); 1876 } 1877 shader_addline(buffer, "MUL A0_SHADOW%s, TA, A0_SHADOW;\n", write_mask); 1878 1879 ((struct shader_arb_ctx_priv *)ins->ctx->backend_data)->addr_reg[0] = '\0'; 1880 } 1881 else if (reg_maps->shader_version.major == 1 1882 && !shader_is_pshader_version(reg_maps->shader_version.type) 1883 && ins->dst[0].reg.type == WINED3DSPR_ADDR) 1884 { 1885 const struct arb_vshader_private *shader_data = shader->backend_data; 1886 src0_param[0] = '\0'; 1887 1888 if (shader_data->rel_offset && ctx->target_version == ARB) 1889 { 1890 const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET); 1891 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param); 1892 shader_addline(buffer, "ADD TA.x, %s, %s;\n", src0_param, offset); 1893 shader_addline(buffer, "ARL A0.x, TA.x;\n"); 1894 } 1895 else 1896 { 1897 /* Apple's ARB_vertex_program implementation does not accept an ARL source argument 1898 * with more than one component. Thus replicate the first source argument over all 1899 * 4 components. For example, .xyzw -> .x (or better: .xxxx), .zwxy -> .z, etc) */ 1900 struct wined3d_shader_src_param tmp_src = ins->src[0]; 1901 tmp_src.swizzle = shader_arb_select_component(tmp_src.swizzle, 0); 1902 shader_arb_get_src_param(ins, &tmp_src, 0, src0_param); 1903 shader_addline(buffer, "ARL A0.x, %s;\n", src0_param); 1904 } 1905 } 1906 else if (ins->dst[0].reg.type == WINED3DSPR_COLOROUT && !ins->dst[0].reg.idx[0].offset && pshader) 1907 { 1908 if (ctx->ps_post_process && shader->u.ps.color0_mov) 1909 { 1910 shader_addline(buffer, "#mov handled in srgb write or fog code\n"); 1911 return; 1912 } 1913 shader_hw_map2gl(ins); 1914 } 1915 else 1916 { 1917 shader_hw_map2gl(ins); 1918 } 1919 } 1920 1921 static void pshader_hw_texkill(const struct wined3d_shader_instruction *ins) 1922 { 1923 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1924 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1925 char reg_dest[40]; 1926 1927 /* No swizzles are allowed in d3d's texkill. PS 1.x ignores the 4th component as documented, 1928 * but >= 2.0 honors it (undocumented, but tested by the d3d9 testsuite) 1929 */ 1930 shader_arb_get_dst_param(ins, dst, reg_dest); 1931 1932 if (ins->ctx->reg_maps->shader_version.major >= 2) 1933 { 1934 const char *kilsrc = "TA"; 1935 BOOL is_color; 1936 1937 shader_arb_get_register_name(ins, &dst->reg, reg_dest, &is_color); 1938 if(dst->write_mask == WINED3DSP_WRITEMASK_ALL) 1939 { 1940 kilsrc = reg_dest; 1941 } 1942 else 1943 { 1944 /* Sigh. KIL doesn't support swizzles/writemasks. KIL passes a writemask, but ".xy" for example 1945 * is not valid as a swizzle in ARB (needs ".xyyy"). Use SWZ to load the register properly, and set 1946 * masked out components to 0(won't kill) 1947 */ 1948 char x = '0', y = '0', z = '0', w = '0'; 1949 if(dst->write_mask & WINED3DSP_WRITEMASK_0) x = 'x'; 1950 if(dst->write_mask & WINED3DSP_WRITEMASK_1) y = 'y'; 1951 if(dst->write_mask & WINED3DSP_WRITEMASK_2) z = 'z'; 1952 if(dst->write_mask & WINED3DSP_WRITEMASK_3) w = 'w'; 1953 shader_addline(buffer, "SWZ TA, %s, %c, %c, %c, %c;\n", reg_dest, x, y, z, w); 1954 } 1955 shader_addline(buffer, "KIL %s;\n", kilsrc); 1956 } 1957 else 1958 { 1959 /* ARB fp doesn't like swizzles on the parameter of the KIL instruction. To mask the 4th component, 1960 * copy the register into our general purpose TMP variable, overwrite .w and pass TMP to KIL 1961 * 1962 * ps_1_3 shaders use the texcoord incarnation of the Tx register. ps_1_4 shaders can use the same, 1963 * or pass in any temporary register(in shader phase 2) 1964 */ 1965 if (ins->ctx->reg_maps->shader_version.minor <= 3) 1966 sprintf(reg_dest, "fragment.texcoord[%u]", dst->reg.idx[0].offset); 1967 else 1968 shader_arb_get_dst_param(ins, dst, reg_dest); 1969 shader_addline(buffer, "SWZ TA, %s, x, y, z, 1;\n", reg_dest); 1970 shader_addline(buffer, "KIL TA;\n"); 1971 } 1972 } 1973 1974 static void pshader_hw_tex(const struct wined3d_shader_instruction *ins) 1975 { 1976 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1977 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1978 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 1979 ins->ctx->reg_maps->shader_version.minor); 1980 struct wined3d_shader_src_param src; 1981 1982 char reg_dest[40]; 1983 char reg_coord[40]; 1984 DWORD reg_sampler_code; 1985 WORD myflags = 0; 1986 BOOL swizzle_coord = FALSE; 1987 1988 /* All versions have a destination register */ 1989 shader_arb_get_dst_param(ins, dst, reg_dest); 1990 1991 /* 1.0-1.4: Use destination register number as texture code. 1992 2.0+: Use provided sampler number as texture code. */ 1993 if (shader_version < WINED3D_SHADER_VERSION(2,0)) 1994 reg_sampler_code = dst->reg.idx[0].offset; 1995 else 1996 reg_sampler_code = ins->src[1].reg.idx[0].offset; 1997 1998 /* 1.0-1.3: Use the texcoord varying. 1999 1.4+: Use provided coordinate source register. */ 2000 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 2001 sprintf(reg_coord, "fragment.texcoord[%u]", reg_sampler_code); 2002 else { 2003 /* TEX is the only instruction that can handle DW and DZ natively */ 2004 src = ins->src[0]; 2005 if(src.modifiers == WINED3DSPSM_DW) src.modifiers = WINED3DSPSM_NONE; 2006 if(src.modifiers == WINED3DSPSM_DZ) src.modifiers = WINED3DSPSM_NONE; 2007 shader_arb_get_src_param(ins, &src, 0, reg_coord); 2008 } 2009 2010 /* projection flag: 2011 * 1.1, 1.2, 1.3: Use WINED3D_TSS_TEXTURETRANSFORMFLAGS 2012 * 1.4: Use WINED3DSPSM_DZ or WINED3DSPSM_DW on src[0] 2013 * 2.0+: Use WINED3DSI_TEXLD_PROJECT on the opcode 2014 */ 2015 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 2016 { 2017 DWORD flags = 0; 2018 if (reg_sampler_code < MAX_TEXTURES) 2019 flags = priv->cur_ps_args->super.tex_transform >> reg_sampler_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT; 2020 if (flags & WINED3D_PSARGS_PROJECTED) 2021 { 2022 myflags |= TEX_PROJ; 2023 if ((flags & ~WINED3D_PSARGS_PROJECTED) == WINED3D_TTFF_COUNT3) 2024 swizzle_coord = TRUE; 2025 } 2026 } 2027 else if (shader_version < WINED3D_SHADER_VERSION(2,0)) 2028 { 2029 enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers; 2030 if (src_mod == WINED3DSPSM_DZ) 2031 { 2032 swizzle_coord = TRUE; 2033 myflags |= TEX_PROJ; 2034 } else if(src_mod == WINED3DSPSM_DW) { 2035 myflags |= TEX_PROJ; 2036 } 2037 } else { 2038 if (ins->flags & WINED3DSI_TEXLD_PROJECT) myflags |= TEX_PROJ; 2039 if (ins->flags & WINED3DSI_TEXLD_BIAS) myflags |= TEX_BIAS; 2040 } 2041 2042 if (swizzle_coord) 2043 { 2044 /* TXP cannot handle DZ natively, so move the z coordinate to .w. 2045 * reg_coord is a read-only varying register, so we need a temp reg */ 2046 shader_addline(ins->ctx->buffer, "SWZ TA, %s, x, y, z, z;\n", reg_coord); 2047 strcpy(reg_coord, "TA"); 2048 } 2049 2050 shader_hw_sample(ins, reg_sampler_code, reg_dest, reg_coord, myflags, NULL, NULL); 2051 } 2052 2053 static void pshader_hw_texcoord(const struct wined3d_shader_instruction *ins) 2054 { 2055 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2056 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2057 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 2058 ins->ctx->reg_maps->shader_version.minor); 2059 char dst_str[50]; 2060 2061 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 2062 { 2063 DWORD reg = dst->reg.idx[0].offset; 2064 2065 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2066 shader_addline(buffer, "MOV_SAT %s, fragment.texcoord[%u];\n", dst_str, reg); 2067 } else { 2068 char reg_src[40]; 2069 2070 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src); 2071 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2072 shader_addline(buffer, "MOV %s, %s;\n", dst_str, reg_src); 2073 } 2074 } 2075 2076 static void pshader_hw_texreg2ar(const struct wined3d_shader_instruction *ins) 2077 { 2078 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2079 DWORD flags = 0; 2080 2081 DWORD reg1 = ins->dst[0].reg.idx[0].offset; 2082 char dst_str[50]; 2083 char src_str[50]; 2084 2085 /* Note that texreg2ar treats Tx as a temporary register, not as a varying */ 2086 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2087 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 2088 /* Move .x first in case src_str is "TA" */ 2089 shader_addline(buffer, "MOV TA.y, %s.x;\n", src_str); 2090 shader_addline(buffer, "MOV TA.x, %s.w;\n", src_str); 2091 if (reg1 < MAX_TEXTURES) 2092 { 2093 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2094 flags = priv->cur_ps_args->super.tex_transform >> reg1 * WINED3D_PSARGS_TEXTRANSFORM_SHIFT; 2095 } 2096 shader_hw_sample(ins, reg1, dst_str, "TA", flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2097 } 2098 2099 static void pshader_hw_texreg2gb(const struct wined3d_shader_instruction *ins) 2100 { 2101 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2102 2103 DWORD reg1 = ins->dst[0].reg.idx[0].offset; 2104 char dst_str[50]; 2105 char src_str[50]; 2106 2107 /* Note that texreg2gb treats Tx as a temporary register, not as a varying */ 2108 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2109 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 2110 shader_addline(buffer, "MOV TA.x, %s.y;\n", src_str); 2111 shader_addline(buffer, "MOV TA.y, %s.z;\n", src_str); 2112 shader_hw_sample(ins, reg1, dst_str, "TA", 0, NULL, NULL); 2113 } 2114 2115 static void pshader_hw_texreg2rgb(const struct wined3d_shader_instruction *ins) 2116 { 2117 DWORD reg1 = ins->dst[0].reg.idx[0].offset; 2118 char dst_str[50]; 2119 char src_str[50]; 2120 2121 /* Note that texreg2rg treats Tx as a temporary register, not as a varying */ 2122 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2123 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 2124 shader_hw_sample(ins, reg1, dst_str, src_str, 0, NULL, NULL); 2125 } 2126 2127 static void pshader_hw_texbem(const struct wined3d_shader_instruction *ins) 2128 { 2129 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2130 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2131 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2132 char reg_coord[40], dst_reg[50], src_reg[50]; 2133 DWORD reg_dest_code; 2134 2135 /* All versions have a destination register. The Tx where the texture coordinates come 2136 * from is the varying incarnation of the texture register 2137 */ 2138 reg_dest_code = dst->reg.idx[0].offset; 2139 shader_arb_get_dst_param(ins, &ins->dst[0], dst_reg); 2140 shader_arb_get_src_param(ins, &ins->src[0], 0, src_reg); 2141 sprintf(reg_coord, "fragment.texcoord[%u]", reg_dest_code); 2142 2143 /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed 2144 * The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register 2145 * 2146 * GL_NV_fragment_program_option could handle this in one instruction via X2D: 2147 * X2D TA.xy, fragment.texcoord, T%u, bumpenvmat%u.xzyw 2148 * 2149 * However, the NV extensions are never enabled for <= 2.0 shaders because of the performance penalty that 2150 * comes with it, and texbem is an 1.x only instruction. No 1.x instruction forces us to enable the NV 2151 * extension. 2152 */ 2153 shader_addline(buffer, "SWZ TB, bumpenvmat%d, x, z, 0, 0;\n", reg_dest_code); 2154 shader_addline(buffer, "DP3 TA.x, TB, %s;\n", src_reg); 2155 shader_addline(buffer, "SWZ TB, bumpenvmat%d, y, w, 0, 0;\n", reg_dest_code); 2156 shader_addline(buffer, "DP3 TA.y, TB, %s;\n", src_reg); 2157 2158 /* with projective textures, texbem only divides the static texture coord, not the displacement, 2159 * so we can't let the GL handle this. 2160 */ 2161 if ((priv->cur_ps_args->super.tex_transform >> reg_dest_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT) 2162 & WINED3D_PSARGS_PROJECTED) 2163 { 2164 shader_addline(buffer, "RCP TB.w, %s.w;\n", reg_coord); 2165 shader_addline(buffer, "MUL TB.xy, %s, TB.w;\n", reg_coord); 2166 shader_addline(buffer, "ADD TA.xy, TA, TB;\n"); 2167 } else { 2168 shader_addline(buffer, "ADD TA.xy, TA, %s;\n", reg_coord); 2169 } 2170 2171 shader_hw_sample(ins, reg_dest_code, dst_reg, "TA", 0, NULL, NULL); 2172 2173 if (ins->handler_idx == WINED3DSIH_TEXBEML) 2174 { 2175 /* No src swizzles are allowed, so this is ok */ 2176 shader_addline(buffer, "MAD TA, %s.z, luminance%d.x, luminance%d.y;\n", 2177 src_reg, reg_dest_code, reg_dest_code); 2178 shader_addline(buffer, "MUL %s, %s, TA;\n", dst_reg, dst_reg); 2179 } 2180 } 2181 2182 static void pshader_hw_texm3x2pad(const struct wined3d_shader_instruction *ins) 2183 { 2184 DWORD reg = ins->dst[0].reg.idx[0].offset; 2185 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2186 char src0_name[50], dst_name[50]; 2187 BOOL is_color; 2188 struct wined3d_shader_register tmp_reg = ins->dst[0].reg; 2189 2190 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2191 /* The next instruction will be a texm3x2tex or texm3x2depth that writes to the uninitialized 2192 * T<reg+1> register. Use this register to store the calculated vector 2193 */ 2194 tmp_reg.idx[0].offset = reg + 1; 2195 shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color); 2196 shader_addline(buffer, "DP3 %s.x, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name); 2197 } 2198 2199 static void pshader_hw_texm3x2tex(const struct wined3d_shader_instruction *ins) 2200 { 2201 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2202 DWORD flags; 2203 DWORD reg = ins->dst[0].reg.idx[0].offset; 2204 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2205 char dst_str[50]; 2206 char src0_name[50]; 2207 char dst_reg[50]; 2208 BOOL is_color; 2209 2210 /* We know that we're writing to the uninitialized T<reg> register, so use it for temporary storage */ 2211 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2212 2213 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2214 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2215 shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2216 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2217 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2218 } 2219 2220 static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins) 2221 { 2222 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2223 DWORD reg = ins->dst[0].reg.idx[0].offset; 2224 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2225 char src0_name[50], dst_name[50]; 2226 struct wined3d_shader_register tmp_reg = ins->dst[0].reg; 2227 BOOL is_color; 2228 2229 /* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with 2230 * incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination 2231 * register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared') 2232 */ 2233 tmp_reg.idx[0].offset = reg + 2 - tex_mx->current_row; 2234 shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color); 2235 2236 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2237 shader_addline(buffer, "DP3 %s.%c, fragment.texcoord[%u], %s;\n", 2238 dst_name, 'x' + tex_mx->current_row, reg, src0_name); 2239 tex_mx->texcoord_w[tex_mx->current_row++] = reg; 2240 } 2241 2242 static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins) 2243 { 2244 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2245 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2246 DWORD flags; 2247 DWORD reg = ins->dst[0].reg.idx[0].offset; 2248 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2249 char dst_str[50]; 2250 char src0_name[50], dst_name[50]; 2251 BOOL is_color; 2252 2253 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2254 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2255 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name); 2256 2257 /* Sample the texture using the calculated coordinates */ 2258 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2259 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2260 shader_hw_sample(ins, reg, dst_str, dst_name, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2261 tex_mx->current_row = 0; 2262 } 2263 2264 static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins) 2265 { 2266 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2267 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2268 DWORD flags; 2269 DWORD reg = ins->dst[0].reg.idx[0].offset; 2270 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2271 char dst_str[50]; 2272 char src0_name[50]; 2273 char dst_reg[50]; 2274 BOOL is_color; 2275 2276 /* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all 2277 * components for temporary data storage 2278 */ 2279 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2280 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2281 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2282 2283 /* Construct the eye-ray vector from w coordinates */ 2284 shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[0]); 2285 shader_addline(buffer, "MOV TB.y, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[1]); 2286 shader_addline(buffer, "MOV TB.z, fragment.texcoord[%u].w;\n", reg); 2287 2288 /* Calculate reflection vector 2289 */ 2290 shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg); 2291 /* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */ 2292 shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg); 2293 shader_addline(buffer, "RCP TB.w, TB.w;\n"); 2294 shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg); 2295 shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg); 2296 shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg); 2297 2298 /* Sample the texture using the calculated coordinates */ 2299 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2300 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2301 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2302 tex_mx->current_row = 0; 2303 } 2304 2305 static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins) 2306 { 2307 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2308 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2309 DWORD flags; 2310 DWORD reg = ins->dst[0].reg.idx[0].offset; 2311 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2312 char dst_str[50]; 2313 char src0_name[50]; 2314 char src1_name[50]; 2315 char dst_reg[50]; 2316 BOOL is_color; 2317 2318 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2319 shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name); 2320 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2321 /* Note: dst_reg.xy is input here, generated by two texm3x3pad instructions */ 2322 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2323 2324 /* Calculate reflection vector. 2325 * 2326 * dot(N, E) 2327 * dst_reg.xyz = 2 * --------- * N - E 2328 * dot(N, N) 2329 * 2330 * Which normalizes the normal vector 2331 */ 2332 shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name); 2333 shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg); 2334 shader_addline(buffer, "RCP TC.w, TC.w;\n"); 2335 shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg); 2336 shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg); 2337 shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name); 2338 2339 /* Sample the texture using the calculated coordinates */ 2340 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2341 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2342 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2343 tex_mx->current_row = 0; 2344 } 2345 2346 static void pshader_hw_texdepth(const struct wined3d_shader_instruction *ins) 2347 { 2348 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2349 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2350 char dst_name[50]; 2351 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2352 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2353 2354 /* texdepth has an implicit destination, the fragment depth value. It's only parameter, 2355 * which is essentially an input, is the destination register because it is the first 2356 * parameter. According to the msdn, this must be register r5, but let's keep it more flexible 2357 * here(writemasks/swizzles are not valid on texdepth) 2358 */ 2359 shader_arb_get_dst_param(ins, dst, dst_name); 2360 2361 /* According to the msdn, the source register(must be r5) is unusable after 2362 * the texdepth instruction, so we're free to modify it 2363 */ 2364 shader_addline(buffer, "MIN %s.y, %s.y, %s;\n", dst_name, dst_name, one); 2365 2366 /* How to deal with the special case dst_name.g == 0? if r != 0, then 2367 * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct 2368 * result. But if r = 0.0, then 0 * inf = 0, which is incorrect. 2369 */ 2370 shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name); 2371 shader_addline(buffer, "MUL TA.x, %s.x, %s.y;\n", dst_name, dst_name); 2372 shader_addline(buffer, "MIN TA.x, TA.x, %s;\n", one); 2373 shader_addline(buffer, "MAX result.depth, TA.x, %s;\n", zero); 2374 } 2375 2376 /** Process the WINED3DSIO_TEXDP3TEX instruction in ARB: 2377 * Take a 3-component dot product of the TexCoord[dstreg] and src, 2378 * then perform a 1D texture lookup from stage dstregnum, place into dst. */ 2379 static void pshader_hw_texdp3tex(const struct wined3d_shader_instruction *ins) 2380 { 2381 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2382 DWORD sampler_idx = ins->dst[0].reg.idx[0].offset; 2383 char src0[50]; 2384 char dst_str[50]; 2385 2386 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2387 shader_addline(buffer, "MOV TB, 0.0;\n"); 2388 shader_addline(buffer, "DP3 TB.x, fragment.texcoord[%u], %s;\n", sampler_idx, src0); 2389 2390 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2391 shader_hw_sample(ins, sampler_idx, dst_str, "TB", 0 /* Only one coord, can't be projected */, NULL, NULL); 2392 } 2393 2394 /** Process the WINED3DSIO_TEXDP3 instruction in ARB: 2395 * Take a 3-component dot product of the TexCoord[dstreg] and src. */ 2396 static void pshader_hw_texdp3(const struct wined3d_shader_instruction *ins) 2397 { 2398 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2399 char src0[50]; 2400 char dst_str[50]; 2401 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2402 2403 /* Handle output register */ 2404 shader_arb_get_dst_param(ins, dst, dst_str); 2405 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2406 shader_addline(buffer, "DP3 %s, fragment.texcoord[%u], %s;\n", dst_str, dst->reg.idx[0].offset, src0); 2407 } 2408 2409 /** Process the WINED3DSIO_TEXM3X3 instruction in ARB 2410 * Perform the 3rd row of a 3x3 matrix multiply */ 2411 static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins) 2412 { 2413 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2414 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2415 char dst_str[50], dst_name[50]; 2416 char src0[50]; 2417 BOOL is_color; 2418 2419 shader_arb_get_dst_param(ins, dst, dst_str); 2420 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2421 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2422 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0); 2423 shader_addline(buffer, "MOV %s, %s;\n", dst_str, dst_name); 2424 } 2425 2426 /** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB: 2427 * Last row of a 3x2 matrix multiply, use the result to calculate the depth: 2428 * Calculate tmp0.y = TexCoord[dstreg] . src.xyz; (tmp0.x has already been calculated) 2429 * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y 2430 */ 2431 static void pshader_hw_texm3x2depth(const struct wined3d_shader_instruction *ins) 2432 { 2433 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2434 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2435 char src0[50], dst_name[50]; 2436 BOOL is_color; 2437 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2438 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2439 2440 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2441 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2442 shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0); 2443 2444 /* How to deal with the special case dst_name.g == 0? if r != 0, then 2445 * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct 2446 * result. But if r = 0.0, then 0 * inf = 0, which is incorrect. 2447 */ 2448 shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name); 2449 shader_addline(buffer, "MUL %s.x, %s.x, %s.y;\n", dst_name, dst_name, dst_name); 2450 shader_addline(buffer, "MIN %s.x, %s.x, %s;\n", dst_name, dst_name, one); 2451 shader_addline(buffer, "MAX result.depth, %s.x, %s;\n", dst_name, zero); 2452 } 2453 2454 /** Handles transforming all WINED3DSIO_M?x? opcodes for 2455 Vertex/Pixel shaders to ARB_vertex_program codes */ 2456 static void shader_hw_mnxn(const struct wined3d_shader_instruction *ins) 2457 { 2458 int i; 2459 int nComponents = 0; 2460 struct wined3d_shader_dst_param tmp_dst = {{0}}; 2461 struct wined3d_shader_src_param tmp_src[2] = {{{0}}}; 2462 struct wined3d_shader_instruction tmp_ins; 2463 2464 memset(&tmp_ins, 0, sizeof(tmp_ins)); 2465 2466 /* Set constants for the temporary argument */ 2467 tmp_ins.ctx = ins->ctx; 2468 tmp_ins.dst_count = 1; 2469 tmp_ins.dst = &tmp_dst; 2470 tmp_ins.src_count = 2; 2471 tmp_ins.src = tmp_src; 2472 2473 switch(ins->handler_idx) 2474 { 2475 case WINED3DSIH_M4x4: 2476 nComponents = 4; 2477 tmp_ins.handler_idx = WINED3DSIH_DP4; 2478 break; 2479 case WINED3DSIH_M4x3: 2480 nComponents = 3; 2481 tmp_ins.handler_idx = WINED3DSIH_DP4; 2482 break; 2483 case WINED3DSIH_M3x4: 2484 nComponents = 4; 2485 tmp_ins.handler_idx = WINED3DSIH_DP3; 2486 break; 2487 case WINED3DSIH_M3x3: 2488 nComponents = 3; 2489 tmp_ins.handler_idx = WINED3DSIH_DP3; 2490 break; 2491 case WINED3DSIH_M3x2: 2492 nComponents = 2; 2493 tmp_ins.handler_idx = WINED3DSIH_DP3; 2494 break; 2495 default: 2496 FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 2497 break; 2498 } 2499 2500 tmp_dst = ins->dst[0]; 2501 tmp_src[0] = ins->src[0]; 2502 tmp_src[1] = ins->src[1]; 2503 for (i = 0; i < nComponents; ++i) 2504 { 2505 tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i; 2506 shader_hw_map2gl(&tmp_ins); 2507 ++tmp_src[1].reg.idx[0].offset; 2508 } 2509 } 2510 2511 static DWORD abs_modifier(DWORD mod, BOOL *need_abs) 2512 { 2513 *need_abs = FALSE; 2514 2515 switch(mod) 2516 { 2517 case WINED3DSPSM_NONE: return WINED3DSPSM_ABS; 2518 case WINED3DSPSM_NEG: return WINED3DSPSM_ABS; 2519 case WINED3DSPSM_BIAS: *need_abs = TRUE; return WINED3DSPSM_BIAS; 2520 case WINED3DSPSM_BIASNEG: *need_abs = TRUE; return WINED3DSPSM_BIASNEG; 2521 case WINED3DSPSM_SIGN: *need_abs = TRUE; return WINED3DSPSM_SIGN; 2522 case WINED3DSPSM_SIGNNEG: *need_abs = TRUE; return WINED3DSPSM_SIGNNEG; 2523 case WINED3DSPSM_COMP: *need_abs = TRUE; return WINED3DSPSM_COMP; 2524 case WINED3DSPSM_X2: *need_abs = TRUE; return WINED3DSPSM_X2; 2525 case WINED3DSPSM_X2NEG: *need_abs = TRUE; return WINED3DSPSM_X2NEG; 2526 case WINED3DSPSM_DZ: *need_abs = TRUE; return WINED3DSPSM_DZ; 2527 case WINED3DSPSM_DW: *need_abs = TRUE; return WINED3DSPSM_DW; 2528 case WINED3DSPSM_ABS: return WINED3DSPSM_ABS; 2529 case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS; 2530 } 2531 FIXME("Unknown modifier %u\n", mod); 2532 return mod; 2533 } 2534 2535 static void shader_hw_scalar_op(const struct wined3d_shader_instruction *ins) 2536 { 2537 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2538 const char *instruction; 2539 struct wined3d_shader_src_param src0_copy = ins->src[0]; 2540 BOOL need_abs = FALSE; 2541 2542 char dst[50]; 2543 char src[50]; 2544 2545 switch(ins->handler_idx) 2546 { 2547 case WINED3DSIH_RSQ: instruction = "RSQ"; break; 2548 case WINED3DSIH_RCP: instruction = "RCP"; break; 2549 case WINED3DSIH_EXPP: 2550 if (ins->ctx->reg_maps->shader_version.major < 2) 2551 { 2552 instruction = "EXP"; 2553 break; 2554 } 2555 /* Drop through. */ 2556 case WINED3DSIH_EXP: 2557 instruction = "EX2"; 2558 break; 2559 case WINED3DSIH_LOG: 2560 case WINED3DSIH_LOGP: 2561 /* The precision requirements suggest that LOGP matches ARBvp's LOG 2562 * instruction, but notice that the output of those instructions is 2563 * different. */ 2564 src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs); 2565 instruction = "LG2"; 2566 break; 2567 default: instruction = ""; 2568 FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 2569 break; 2570 } 2571 2572 /* Dx sdk says .x is used if no swizzle is given, but our test shows that 2573 * .w is used. */ 2574 src0_copy.swizzle = shader_arb_select_component(src0_copy.swizzle, 3); 2575 2576 shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */ 2577 shader_arb_get_src_param(ins, &src0_copy, 0, src); 2578 2579 if(need_abs) 2580 { 2581 shader_addline(buffer, "ABS TA.w, %s;\n", src); 2582 shader_addline(buffer, "%s%s %s, TA.w;\n", instruction, shader_arb_get_modifier(ins), dst); 2583 } 2584 else 2585 { 2586 shader_addline(buffer, "%s%s %s, %s;\n", instruction, shader_arb_get_modifier(ins), dst, src); 2587 } 2588 2589 } 2590 2591 static void shader_hw_nrm(const struct wined3d_shader_instruction *ins) 2592 { 2593 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2594 char dst_name[50]; 2595 char src_name[50]; 2596 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2597 BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type); 2598 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2599 2600 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2601 shader_arb_get_src_param(ins, &ins->src[0], 1 /* Use TB */, src_name); 2602 2603 /* In D3D, NRM of a vector with length zero returns zero. Catch this situation, as 2604 * otherwise NRM or RSQ would return NaN */ 2605 if(pshader && priv->target_version >= NV3) 2606 { 2607 /* GL_NV_fragment_program2's NRM needs protection against length zero vectors too 2608 * 2609 * TODO: Find out if DP3+NRM+MOV is really faster than DP3+RSQ+MUL 2610 */ 2611 shader_addline(buffer, "DP3C TA, %s, %s;\n", src_name, src_name); 2612 shader_addline(buffer, "NRM%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name); 2613 shader_addline(buffer, "MOV %s (EQ), %s;\n", dst_name, zero); 2614 } 2615 else if(priv->target_version >= NV2) 2616 { 2617 shader_addline(buffer, "DP3C TA.x, %s, %s;\n", src_name, src_name); 2618 shader_addline(buffer, "RSQ TA.x (NE), TA.x;\n"); 2619 shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name, 2620 src_name); 2621 } 2622 else 2623 { 2624 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2625 2626 shader_addline(buffer, "DP3 TA.x, %s, %s;\n", src_name, src_name); 2627 /* Pass any non-zero value to RSQ if the input vector has a length of zero. The 2628 * RSQ result doesn't matter, as long as multiplying it by 0 returns 0. 2629 */ 2630 shader_addline(buffer, "SGE TA.y, -TA.x, %s;\n", zero); 2631 shader_addline(buffer, "MAD TA.x, %s, TA.y, TA.x;\n", one); 2632 2633 shader_addline(buffer, "RSQ TA.x, TA.x;\n"); 2634 /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/ 2635 shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name, 2636 src_name); 2637 } 2638 } 2639 2640 static void shader_hw_lrp(const struct wined3d_shader_instruction *ins) 2641 { 2642 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2643 char dst_name[50]; 2644 char src_name[3][50]; 2645 2646 /* ARB_fragment_program has a convenient LRP instruction */ 2647 if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) { 2648 shader_hw_map2gl(ins); 2649 return; 2650 } 2651 2652 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2653 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 2654 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 2655 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 2656 2657 shader_addline(buffer, "SUB TA, %s, %s;\n", src_name[1], src_name[2]); 2658 shader_addline(buffer, "MAD%s %s, %s, TA, %s;\n", shader_arb_get_modifier(ins), 2659 dst_name, src_name[0], src_name[2]); 2660 } 2661 2662 static void shader_hw_sincos(const struct wined3d_shader_instruction *ins) 2663 { 2664 /* This instruction exists in ARB, but the d3d instruction takes two extra parameters which 2665 * must contain fixed constants. So we need a separate function to filter those constants and 2666 * can't use map2gl 2667 */ 2668 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2669 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2670 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2671 char dst_name[50]; 2672 char src_name0[50], src_name1[50], src_name2[50]; 2673 BOOL is_color; 2674 2675 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 2676 if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) { 2677 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2678 /* No modifiers are supported on SCS */ 2679 shader_addline(buffer, "SCS %s, %s;\n", dst_name, src_name0); 2680 2681 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) 2682 { 2683 shader_arb_get_register_name(ins, &dst->reg, src_name0, &is_color); 2684 shader_addline(buffer, "MOV_SAT %s, %s;\n", dst_name, src_name0); 2685 } 2686 } else if(priv->target_version >= NV2) { 2687 shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color); 2688 2689 /* Sincos writemask must be .x, .y or .xy */ 2690 if(dst->write_mask & WINED3DSP_WRITEMASK_0) 2691 shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0); 2692 if(dst->write_mask & WINED3DSP_WRITEMASK_1) 2693 shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0); 2694 } else { 2695 /* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8 2696 * helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2. 2697 * 2698 * sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ... 2699 * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ... 2700 * 2701 * The constants we get are: 2702 * 2703 * +1 +1, -1 -1 +1 +1 -1 -1 2704 * ---- , ---- , ---- , ----- , ----- , ----- , ------ 2705 * 1!*2 2!*4 3!*8 4!*16 5!*32 6!*64 7!*128 2706 * 2707 * If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2): 2708 * 2709 * (x/2)^2 = x^2 / 4 2710 * (x/2)^3 = x^3 / 8 2711 * (x/2)^4 = x^4 / 16 2712 * (x/2)^5 = x^5 / 32 2713 * etc 2714 * 2715 * To get the final result: 2716 * sin(x) = 2 * sin(x/2) * cos(x/2) 2717 * cos(x) = cos(x/2)^2 - sin(x/2)^2 2718 * (from sin(x+y) and cos(x+y) rules) 2719 * 2720 * As per MSDN, dst.z is undefined after the operation, and so is 2721 * dst.x and dst.y if they're masked out by the writemask. Ie 2722 * sincos dst.y, src1, c0, c1 2723 * returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler 2724 * vsa.exe also stops with an error if the dest register is the same register as the source 2725 * register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also 2726 * indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0). 2727 */ 2728 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 2729 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2); 2730 shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color); 2731 2732 shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0); /* x ^ 2 */ 2733 shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0); /* x ^ 3 */ 2734 shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0); /* x ^ 4 */ 2735 shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0); /* x ^ 5 */ 2736 shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0); /* x ^ 6 */ 2737 shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0); /* x ^ 7 */ 2738 2739 /* sin(x/2) 2740 * 2741 * Unfortunately we don't get the constants in a DP4-capable form. Is there a way to 2742 * properly merge that with MULs in the code above? 2743 * The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe 2744 * we can merge the sine and cosine MAD rows to calculate them together. 2745 */ 2746 shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */ 2747 shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */ 2748 shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */ 2749 shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */ 2750 2751 /* cos(x/2) */ 2752 shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */ 2753 shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */ 2754 shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */ 2755 2756 if(dst->write_mask & WINED3DSP_WRITEMASK_0) { 2757 /* cos x */ 2758 shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n"); 2759 shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name); 2760 } 2761 if(dst->write_mask & WINED3DSP_WRITEMASK_1) { 2762 /* sin x */ 2763 shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name); 2764 shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name); 2765 } 2766 } 2767 } 2768 2769 static void shader_hw_sgn(const struct wined3d_shader_instruction *ins) 2770 { 2771 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2772 char dst_name[50]; 2773 char src_name[50]; 2774 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 2775 2776 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2777 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name); 2778 2779 /* SGN is only valid in vertex shaders */ 2780 if(ctx->target_version >= NV2) { 2781 shader_addline(buffer, "SSG%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name); 2782 return; 2783 } 2784 2785 /* If SRC > 0.0, -SRC < SRC = TRUE, otherwise false. 2786 * if SRC < 0.0, SRC < -SRC = TRUE. If neither is true, src = 0.0 2787 */ 2788 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) { 2789 shader_addline(buffer, "SLT %s, -%s, %s;\n", dst_name, src_name, src_name); 2790 } else { 2791 /* src contains TA? Write to the dest first. This won't overwrite our destination. 2792 * Then use TA, and calculate the final result 2793 * 2794 * Not reading from TA? Store the first result in TA to avoid overwriting the 2795 * destination if src reg = dst reg 2796 */ 2797 if(strstr(src_name, "TA")) 2798 { 2799 shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name); 2800 shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name); 2801 shader_addline(buffer, "ADD %s, %s, -TA;\n", dst_name, dst_name); 2802 } 2803 else 2804 { 2805 shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name); 2806 shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name); 2807 shader_addline(buffer, "ADD %s, TA, -%s;\n", dst_name, dst_name); 2808 } 2809 } 2810 } 2811 2812 static void shader_hw_dsy(const struct wined3d_shader_instruction *ins) 2813 { 2814 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2815 char src[50]; 2816 char dst[50]; 2817 char dst_name[50]; 2818 BOOL is_color; 2819 2820 shader_arb_get_dst_param(ins, &ins->dst[0], dst); 2821 shader_arb_get_src_param(ins, &ins->src[0], 0, src); 2822 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2823 2824 shader_addline(buffer, "DDY %s, %s;\n", dst, src); 2825 shader_addline(buffer, "MUL%s %s, %s, ycorrection.y;\n", shader_arb_get_modifier(ins), dst, dst_name); 2826 } 2827 2828 static void shader_hw_pow(const struct wined3d_shader_instruction *ins) 2829 { 2830 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2831 char src0[50], src1[50], dst[50]; 2832 struct wined3d_shader_src_param src0_copy = ins->src[0]; 2833 BOOL need_abs = FALSE; 2834 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2835 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2836 2837 /* POW operates on the absolute value of the input */ 2838 src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs); 2839 2840 shader_arb_get_dst_param(ins, &ins->dst[0], dst); 2841 shader_arb_get_src_param(ins, &src0_copy, 0, src0); 2842 shader_arb_get_src_param(ins, &ins->src[1], 1, src1); 2843 2844 if (need_abs) 2845 shader_addline(buffer, "ABS TA.x, %s;\n", src0); 2846 else 2847 shader_addline(buffer, "MOV TA.x, %s;\n", src0); 2848 2849 if (priv->target_version >= NV2) 2850 { 2851 shader_addline(buffer, "MOVC TA.y, %s;\n", src1); 2852 shader_addline(buffer, "POW%s %s, TA.x, TA.y;\n", shader_arb_get_modifier(ins), dst); 2853 shader_addline(buffer, "MOV %s (EQ.y), %s;\n", dst, one); 2854 } 2855 else 2856 { 2857 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2858 const char *flt_eps = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_EPS); 2859 2860 shader_addline(buffer, "ABS TA.y, %s;\n", src1); 2861 shader_addline(buffer, "SGE TA.y, -TA.y, %s;\n", zero); 2862 /* Possibly add flt_eps to avoid getting float special values */ 2863 shader_addline(buffer, "MAD TA.z, TA.y, %s, %s;\n", flt_eps, src1); 2864 shader_addline(buffer, "POW%s TA.x, TA.x, TA.z;\n", shader_arb_get_modifier(ins)); 2865 shader_addline(buffer, "MAD TA.x, -TA.x, TA.y, TA.x;\n"); 2866 shader_addline(buffer, "MAD %s, TA.y, %s, TA.x;\n", dst, one); 2867 } 2868 } 2869 2870 static void shader_hw_loop(const struct wined3d_shader_instruction *ins) 2871 { 2872 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2873 char src_name[50]; 2874 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2875 2876 /* src0 is aL */ 2877 shader_arb_get_src_param(ins, &ins->src[1], 0, src_name); 2878 2879 if(vshader) 2880 { 2881 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2882 struct list *e = list_head(&priv->control_frames); 2883 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2884 2885 if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n"); 2886 /* The constant loader makes sure to load -1 into iX.w */ 2887 shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name); 2888 shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop); 2889 shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop); 2890 } 2891 else 2892 { 2893 shader_addline(buffer, "LOOP %s;\n", src_name); 2894 } 2895 } 2896 2897 static void shader_hw_rep(const struct wined3d_shader_instruction *ins) 2898 { 2899 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2900 char src_name[50]; 2901 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2902 2903 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name); 2904 2905 /* The constant loader makes sure to load -1 into iX.w */ 2906 if(vshader) 2907 { 2908 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2909 struct list *e = list_head(&priv->control_frames); 2910 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2911 2912 if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n"); 2913 2914 shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name); 2915 shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop); 2916 shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop); 2917 } 2918 else 2919 { 2920 shader_addline(buffer, "REP %s;\n", src_name); 2921 } 2922 } 2923 2924 static void shader_hw_endloop(const struct wined3d_shader_instruction *ins) 2925 { 2926 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2927 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2928 2929 if(vshader) 2930 { 2931 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2932 struct list *e = list_head(&priv->control_frames); 2933 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2934 2935 shader_addline(buffer, "ARAC aL.xy, aL;\n"); 2936 shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop); 2937 shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop); 2938 2939 if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n"); 2940 } 2941 else 2942 { 2943 shader_addline(buffer, "ENDLOOP;\n"); 2944 } 2945 } 2946 2947 static void shader_hw_endrep(const struct wined3d_shader_instruction *ins) 2948 { 2949 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2950 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2951 2952 if(vshader) 2953 { 2954 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2955 struct list *e = list_head(&priv->control_frames); 2956 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2957 2958 shader_addline(buffer, "ARAC aL.xy, aL;\n"); 2959 shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop); 2960 shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop); 2961 2962 if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n"); 2963 } 2964 else 2965 { 2966 shader_addline(buffer, "ENDREP;\n"); 2967 } 2968 } 2969 2970 static const struct control_frame *find_last_loop(const struct shader_arb_ctx_priv *priv) 2971 { 2972 struct control_frame *control_frame; 2973 2974 LIST_FOR_EACH_ENTRY(control_frame, &priv->control_frames, struct control_frame, entry) 2975 { 2976 if(control_frame->type == LOOP || control_frame->type == REP) return control_frame; 2977 } 2978 ERR("Could not find loop for break\n"); 2979 return NULL; 2980 } 2981 2982 static void shader_hw_break(const struct wined3d_shader_instruction *ins) 2983 { 2984 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2985 const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data); 2986 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2987 2988 if(vshader) 2989 { 2990 shader_addline(buffer, "BRA loop_%u_end;\n", control_frame->no.loop); 2991 } 2992 else 2993 { 2994 shader_addline(buffer, "BRK;\n"); 2995 } 2996 } 2997 2998 static const char *get_compare(enum wined3d_shader_rel_op op) 2999 { 3000 switch (op) 3001 { 3002 case WINED3D_SHADER_REL_OP_GT: return "GT"; 3003 case WINED3D_SHADER_REL_OP_EQ: return "EQ"; 3004 case WINED3D_SHADER_REL_OP_GE: return "GE"; 3005 case WINED3D_SHADER_REL_OP_LT: return "LT"; 3006 case WINED3D_SHADER_REL_OP_NE: return "NE"; 3007 case WINED3D_SHADER_REL_OP_LE: return "LE"; 3008 default: 3009 FIXME("Unrecognized operator %#x.\n", op); 3010 return "(\?\?)"; 3011 } 3012 } 3013 3014 static enum wined3d_shader_rel_op invert_compare(enum wined3d_shader_rel_op op) 3015 { 3016 switch (op) 3017 { 3018 case WINED3D_SHADER_REL_OP_GT: return WINED3D_SHADER_REL_OP_LE; 3019 case WINED3D_SHADER_REL_OP_EQ: return WINED3D_SHADER_REL_OP_NE; 3020 case WINED3D_SHADER_REL_OP_GE: return WINED3D_SHADER_REL_OP_LT; 3021 case WINED3D_SHADER_REL_OP_LT: return WINED3D_SHADER_REL_OP_GE; 3022 case WINED3D_SHADER_REL_OP_NE: return WINED3D_SHADER_REL_OP_EQ; 3023 case WINED3D_SHADER_REL_OP_LE: return WINED3D_SHADER_REL_OP_GT; 3024 default: 3025 FIXME("Unrecognized operator %#x.\n", op); 3026 return -1; 3027 } 3028 } 3029 3030 static void shader_hw_breakc(const struct wined3d_shader_instruction *ins) 3031 { 3032 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3033 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3034 const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data); 3035 char src_name0[50]; 3036 char src_name1[50]; 3037 const char *comp = get_compare(ins->flags); 3038 3039 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 3040 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 3041 3042 if(vshader) 3043 { 3044 /* SUBC CC, src0, src1" works only in pixel shaders, so use TA to throw 3045 * away the subtraction result 3046 */ 3047 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3048 shader_addline(buffer, "BRA loop_%u_end (%s.x);\n", control_frame->no.loop, comp); 3049 } 3050 else 3051 { 3052 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3053 shader_addline(buffer, "BRK (%s.x);\n", comp); 3054 } 3055 } 3056 3057 static void shader_hw_ifc(const struct wined3d_shader_instruction *ins) 3058 { 3059 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3060 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3061 struct list *e = list_head(&priv->control_frames); 3062 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 3063 const char *comp; 3064 char src_name0[50]; 3065 char src_name1[50]; 3066 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3067 3068 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 3069 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 3070 3071 if(vshader) 3072 { 3073 /* Invert the flag. We jump to the else label if the condition is NOT true */ 3074 comp = get_compare(invert_compare(ins->flags)); 3075 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3076 shader_addline(buffer, "BRA ifc_%u_else (%s.x);\n", control_frame->no.ifc, comp); 3077 } 3078 else 3079 { 3080 comp = get_compare(ins->flags); 3081 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3082 shader_addline(buffer, "IF %s.x;\n", comp); 3083 } 3084 } 3085 3086 static void shader_hw_else(const struct wined3d_shader_instruction *ins) 3087 { 3088 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3089 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3090 struct list *e = list_head(&priv->control_frames); 3091 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 3092 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3093 3094 if(vshader) 3095 { 3096 shader_addline(buffer, "BRA ifc_%u_endif;\n", control_frame->no.ifc); 3097 shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc); 3098 control_frame->had_else = TRUE; 3099 } 3100 else 3101 { 3102 shader_addline(buffer, "ELSE;\n"); 3103 } 3104 } 3105 3106 static void shader_hw_endif(const struct wined3d_shader_instruction *ins) 3107 { 3108 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3109 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3110 struct list *e = list_head(&priv->control_frames); 3111 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 3112 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3113 3114 if(vshader) 3115 { 3116 if(control_frame->had_else) 3117 { 3118 shader_addline(buffer, "ifc_%u_endif:\n", control_frame->no.ifc); 3119 } 3120 else 3121 { 3122 shader_addline(buffer, "#No else branch. else is endif\n"); 3123 shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc); 3124 } 3125 } 3126 else 3127 { 3128 shader_addline(buffer, "ENDIF;\n"); 3129 } 3130 } 3131 3132 static void shader_hw_texldd(const struct wined3d_shader_instruction *ins) 3133 { 3134 DWORD sampler_idx = ins->src[1].reg.idx[0].offset; 3135 char reg_dest[40]; 3136 char reg_src[3][40]; 3137 WORD flags = TEX_DERIV; 3138 3139 shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest); 3140 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src[0]); 3141 shader_arb_get_src_param(ins, &ins->src[2], 1, reg_src[1]); 3142 shader_arb_get_src_param(ins, &ins->src[3], 2, reg_src[2]); 3143 3144 if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ; 3145 if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS; 3146 3147 shader_hw_sample(ins, sampler_idx, reg_dest, reg_src[0], flags, reg_src[1], reg_src[2]); 3148 } 3149 3150 static void shader_hw_texldl(const struct wined3d_shader_instruction *ins) 3151 { 3152 DWORD sampler_idx = ins->src[1].reg.idx[0].offset; 3153 char reg_dest[40]; 3154 char reg_coord[40]; 3155 WORD flags = TEX_LOD; 3156 3157 shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest); 3158 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_coord); 3159 3160 if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ; 3161 if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS; 3162 3163 shader_hw_sample(ins, sampler_idx, reg_dest, reg_coord, flags, NULL, NULL); 3164 } 3165 3166 static void shader_hw_label(const struct wined3d_shader_instruction *ins) 3167 { 3168 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3169 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3170 3171 priv->in_main_func = FALSE; 3172 /* Call instructions activate the NV extensions, not labels and rets. If there is an uncalled 3173 * subroutine, don't generate a label that will make GL complain 3174 */ 3175 if(priv->target_version == ARB) return; 3176 3177 shader_addline(buffer, "l%u:\n", ins->src[0].reg.idx[0].offset); 3178 } 3179 3180 static void vshader_add_footer(struct shader_arb_ctx_priv *priv_ctx, 3181 const struct arb_vshader_private *shader_data, const struct arb_vs_compile_args *args, 3182 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info, 3183 struct wined3d_string_buffer *buffer) 3184 { 3185 unsigned int i; 3186 3187 /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used 3188 * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE), 3189 * the fog frag coord is thrown away. If the fog frag coord is used, but not written by 3190 * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0) 3191 */ 3192 if (args->super.fog_src == VS_FOG_Z) 3193 { 3194 shader_addline(buffer, "MOV result.fogcoord, TMP_OUT.z;\n"); 3195 } 3196 else 3197 { 3198 if (!reg_maps->fog) 3199 { 3200 /* posFixup.x is always 1.0, so we can safely use it */ 3201 shader_addline(buffer, "ADD result.fogcoord, posFixup.x, -posFixup.x;\n"); 3202 } 3203 else 3204 { 3205 /* Clamp fogcoord */ 3206 const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO); 3207 const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE); 3208 3209 shader_addline(buffer, "MIN TMP_FOGCOORD.x, TMP_FOGCOORD.x, %s;\n", one); 3210 shader_addline(buffer, "MAX result.fogcoord.x, TMP_FOGCOORD.x, %s;\n", zero); 3211 } 3212 } 3213 3214 /* Clipplanes are always stored without y inversion */ 3215 if (use_nv_clip(gl_info) && priv_ctx->target_version >= NV2) 3216 { 3217 if (args->super.clip_enabled) 3218 { 3219 for (i = 0; i < priv_ctx->vs_clipplanes; i++) 3220 { 3221 shader_addline(buffer, "DP4 result.clip[%u].x, TMP_OUT, state.clip[%u].plane;\n", i, i); 3222 } 3223 } 3224 } 3225 else if (args->clip.boolclip.clip_texcoord) 3226 { 3227 static const char component[4] = {'x', 'y', 'z', 'w'}; 3228 unsigned int cur_clip = 0; 3229 const char *zero = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ZERO); 3230 3231 for (i = 0; i < gl_info->limits.user_clip_distances; ++i) 3232 { 3233 if (args->clip.boolclip.clipplane_mask & (1u << i)) 3234 { 3235 shader_addline(buffer, "DP4 TA.%c, TMP_OUT, state.clip[%u].plane;\n", 3236 component[cur_clip++], i); 3237 } 3238 } 3239 switch (cur_clip) 3240 { 3241 case 0: 3242 shader_addline(buffer, "MOV TA, %s;\n", zero); 3243 break; 3244 case 1: 3245 shader_addline(buffer, "MOV TA.yzw, %s;\n", zero); 3246 break; 3247 case 2: 3248 shader_addline(buffer, "MOV TA.zw, %s;\n", zero); 3249 break; 3250 case 3: 3251 shader_addline(buffer, "MOV TA.w, %s;\n", zero); 3252 break; 3253 } 3254 shader_addline(buffer, "MOV result.texcoord[%u], TA;\n", 3255 args->clip.boolclip.clip_texcoord - 1); 3256 } 3257 3258 /* Write the final position. 3259 * 3260 * OpenGL coordinates specify the center of the pixel while d3d coords specify 3261 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains 3262 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x 3263 * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that. 3264 */ 3265 if (!gl_info->supported[ARB_CLIP_CONTROL]) 3266 { 3267 shader_addline(buffer, "MUL TA, posFixup, TMP_OUT.w;\n"); 3268 shader_addline(buffer, "ADD TMP_OUT.x, TMP_OUT.x, TA.z;\n"); 3269 shader_addline(buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TA.w;\n"); 3270 3271 /* Z coord [0;1]->[-1;1] mapping, see comment in 3272 * get_projection_matrix() in utils.c. */ 3273 if (need_helper_const(shader_data, reg_maps, gl_info)) 3274 { 3275 const char *two = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_TWO); 3276 shader_addline(buffer, "MAD TMP_OUT.z, TMP_OUT.z, %s, -TMP_OUT.w;\n", two); 3277 } 3278 else 3279 { 3280 shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, TMP_OUT.z;\n"); 3281 shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, -TMP_OUT.w;\n"); 3282 } 3283 } 3284 3285 shader_addline(buffer, "MOV result.position, TMP_OUT;\n"); 3286 3287 priv_ctx->footer_written = TRUE; 3288 } 3289 3290 static void shader_hw_ret(const struct wined3d_shader_instruction *ins) 3291 { 3292 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3293 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3294 const struct wined3d_shader *shader = ins->ctx->shader; 3295 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3296 3297 if(priv->target_version == ARB) return; 3298 3299 if(vshader) 3300 { 3301 if (priv->in_main_func) vshader_add_footer(priv, shader->backend_data, 3302 priv->cur_vs_args, ins->ctx->reg_maps, ins->ctx->gl_info, buffer); 3303 } 3304 3305 shader_addline(buffer, "RET;\n"); 3306 } 3307 3308 static void shader_hw_call(const struct wined3d_shader_instruction *ins) 3309 { 3310 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3311 shader_addline(buffer, "CAL l%u;\n", ins->src[0].reg.idx[0].offset); 3312 } 3313 3314 static BOOL shader_arb_compile(const struct wined3d_gl_info *gl_info, GLenum target, const char *src) 3315 { 3316 const char *ptr, *line; 3317 GLint native, pos; 3318 3319 if (TRACE_ON(d3d_shader)) 3320 { 3321 ptr = src; 3322 while ((line = get_line(&ptr))) TRACE_(d3d_shader)(" %.*s", (int)(ptr - line), line); 3323 } 3324 3325 GL_EXTCALL(glProgramStringARB(target, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(src), src)); 3326 checkGLcall("glProgramStringARB()"); 3327 3328 if (FIXME_ON(d3d_shader)) 3329 { 3330 gl_info->gl_ops.gl.p_glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 3331 if (pos != -1) 3332 { 3333 FIXME_(d3d_shader)("Program error at position %d: %s\n\n", pos, 3334 debugstr_a((const char *)gl_info->gl_ops.gl.p_glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 3335 ptr = src; 3336 while ((line = get_line(&ptr))) FIXME_(d3d_shader)(" %.*s", (int)(ptr - line), line); 3337 FIXME_(d3d_shader)("\n"); 3338 3339 return FALSE; 3340 } 3341 } 3342 3343 if (WARN_ON(d3d_perf)) 3344 { 3345 GL_EXTCALL(glGetProgramivARB(target, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 3346 checkGLcall("glGetProgramivARB()"); 3347 if (!native) 3348 WARN_(d3d_perf)("Program exceeds native resource limits.\n"); 3349 } 3350 3351 return TRUE; 3352 } 3353 3354 static void arbfp_add_sRGB_correction(struct wined3d_string_buffer *buffer, const char *fragcolor, 3355 const char *tmp1, const char *tmp2, const char *tmp3, const char *tmp4, BOOL condcode) 3356 { 3357 /* Perform sRGB write correction. See GLX_EXT_framebuffer_sRGB */ 3358 3359 if(condcode) 3360 { 3361 /* Sigh. MOVC CC doesn't work, so use one of the temps as dummy dest */ 3362 shader_addline(buffer, "SUBC %s, %s.x, srgb_consts1.x;\n", tmp1, fragcolor); 3363 /* Calculate the > 0.0031308 case */ 3364 shader_addline(buffer, "POW %s.x (GE), %s.x, srgb_consts0.x;\n", fragcolor, fragcolor); 3365 shader_addline(buffer, "POW %s.y (GE), %s.y, srgb_consts0.x;\n", fragcolor, fragcolor); 3366 shader_addline(buffer, "POW %s.z (GE), %s.z, srgb_consts0.x;\n", fragcolor, fragcolor); 3367 shader_addline(buffer, "MUL %s.xyz (GE), %s, srgb_consts0.y;\n", fragcolor, fragcolor); 3368 shader_addline(buffer, "SUB %s.xyz (GE), %s, srgb_consts0.z;\n", fragcolor, fragcolor); 3369 /* Calculate the < case */ 3370 shader_addline(buffer, "MUL %s.xyz (LT), srgb_consts0.w, %s;\n", fragcolor, fragcolor); 3371 } 3372 else 3373 { 3374 /* Calculate the > 0.0031308 case */ 3375 shader_addline(buffer, "POW %s.x, %s.x, srgb_consts0.x;\n", tmp1, fragcolor); 3376 shader_addline(buffer, "POW %s.y, %s.y, srgb_consts0.x;\n", tmp1, fragcolor); 3377 shader_addline(buffer, "POW %s.z, %s.z, srgb_consts0.x;\n", tmp1, fragcolor); 3378 shader_addline(buffer, "MUL %s, %s, srgb_consts0.y;\n", tmp1, tmp1); 3379 shader_addline(buffer, "SUB %s, %s, srgb_consts0.z;\n", tmp1, tmp1); 3380 /* Calculate the < case */ 3381 shader_addline(buffer, "MUL %s, srgb_consts0.w, %s;\n", tmp2, fragcolor); 3382 /* Get 1.0 / 0.0 masks for > 0.0031308 and < 0.0031308 */ 3383 shader_addline(buffer, "SLT %s, srgb_consts1.x, %s;\n", tmp3, fragcolor); 3384 shader_addline(buffer, "SGE %s, srgb_consts1.x, %s;\n", tmp4, fragcolor); 3385 /* Store the components > 0.0031308 in the destination */ 3386 shader_addline(buffer, "MUL %s.xyz, %s, %s;\n", fragcolor, tmp1, tmp3); 3387 /* Add the components that are < 0.0031308 */ 3388 shader_addline(buffer, "MAD %s.xyz, %s, %s, %s;\n", fragcolor, tmp2, tmp4, fragcolor); 3389 /* Move everything into result.color at once. Nvidia hardware cannot handle partial 3390 * result.color writes(.rgb first, then .a), or handle overwriting already written 3391 * components. The assembler uses a temporary register in this case, which is usually 3392 * not allocated from one of our registers that were used earlier. 3393 */ 3394 } 3395 /* [0.0;1.0] clamping. Not needed, this is done implicitly */ 3396 } 3397 3398 static const DWORD *find_loop_control_values(const struct wined3d_shader *shader, DWORD idx) 3399 { 3400 const struct wined3d_shader_lconst *constant; 3401 3402 LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry) 3403 { 3404 if (constant->idx == idx) 3405 { 3406 return constant->value; 3407 } 3408 } 3409 return NULL; 3410 } 3411 3412 static void init_ps_input(const struct wined3d_shader *shader, 3413 const struct arb_ps_compile_args *args, struct shader_arb_ctx_priv *priv) 3414 { 3415 static const char * const texcoords[8] = 3416 { 3417 "fragment.texcoord[0]", "fragment.texcoord[1]", "fragment.texcoord[2]", "fragment.texcoord[3]", 3418 "fragment.texcoord[4]", "fragment.texcoord[5]", "fragment.texcoord[6]", "fragment.texcoord[7]" 3419 }; 3420 unsigned int i; 3421 const struct wined3d_shader_signature_element *input; 3422 const char *semantic_name; 3423 DWORD semantic_idx; 3424 3425 switch(args->super.vp_mode) 3426 { 3427 case pretransformed: 3428 case fixedfunction: 3429 /* The pixelshader has to collect the varyings on its own. In any case properly load 3430 * color0 and color1. In the case of pretransformed vertices also load texcoords. Set 3431 * other attribs to 0.0. 3432 * 3433 * For fixedfunction this behavior is correct, according to the tests. For pretransformed 3434 * we'd either need a replacement shader that can load other attribs like BINORMAL, or 3435 * load the texcoord attrib pointers to match the pixel shader signature 3436 */ 3437 for (i = 0; i < shader->input_signature.element_count; ++i) 3438 { 3439 input = &shader->input_signature.elements[i]; 3440 if (!(semantic_name = input->semantic_name)) 3441 continue; 3442 semantic_idx = input->semantic_idx; 3443 3444 if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_COLOR)) 3445 { 3446 if (!semantic_idx) 3447 priv->ps_input[input->register_idx] = "fragment.color.primary"; 3448 else if (semantic_idx == 1) 3449 priv->ps_input[input->register_idx] = "fragment.color.secondary"; 3450 else 3451 priv->ps_input[input->register_idx] = "0.0"; 3452 } 3453 else if (args->super.vp_mode == fixedfunction) 3454 { 3455 priv->ps_input[input->register_idx] = "0.0"; 3456 } 3457 else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_TEXCOORD)) 3458 { 3459 if (semantic_idx < 8) 3460 priv->ps_input[input->register_idx] = texcoords[semantic_idx]; 3461 else 3462 priv->ps_input[input->register_idx] = "0.0"; 3463 } 3464 else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_FOG)) 3465 { 3466 if (!semantic_idx) 3467 priv->ps_input[input->register_idx] = "fragment.fogcoord"; 3468 else 3469 priv->ps_input[input->register_idx] = "0.0"; 3470 } 3471 else 3472 { 3473 priv->ps_input[input->register_idx] = "0.0"; 3474 } 3475 3476 TRACE("v%u, semantic %s%u is %s\n", input->register_idx, 3477 semantic_name, semantic_idx, priv->ps_input[input->register_idx]); 3478 } 3479 break; 3480 3481 case vertexshader: 3482 /* That one is easy. The vertex shaders provide v0-v7 in fragment.texcoord and v8 and v9 in 3483 * fragment.color 3484 */ 3485 for(i = 0; i < 8; i++) 3486 { 3487 priv->ps_input[i] = texcoords[i]; 3488 } 3489 priv->ps_input[8] = "fragment.color.primary"; 3490 priv->ps_input[9] = "fragment.color.secondary"; 3491 break; 3492 } 3493 } 3494 3495 static void arbfp_add_linear_fog(struct wined3d_string_buffer *buffer, 3496 const char *fragcolor, const char *tmp) 3497 { 3498 shader_addline(buffer, "SUB %s.x, state.fog.params.z, fragment.fogcoord.x;\n", tmp); 3499 shader_addline(buffer, "MUL_SAT %s.x, %s.x, state.fog.params.w;\n", tmp, tmp); 3500 shader_addline(buffer, "LRP %s.rgb, %s.x, %s, state.fog.color;\n", fragcolor, tmp, fragcolor); 3501 } 3502 3503 /* Context activation is done by the caller. */ 3504 static GLuint shader_arb_generate_pshader(const struct wined3d_shader *shader, 3505 const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer, 3506 const struct arb_ps_compile_args *args, struct arb_ps_compiled_shader *compiled) 3507 { 3508 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 3509 GLuint retval; 3510 char fragcolor[16]; 3511 DWORD next_local = 0; 3512 struct shader_arb_ctx_priv priv_ctx; 3513 BOOL dcl_td = FALSE; 3514 BOOL want_nv_prog = FALSE; 3515 struct arb_pshader_private *shader_priv = shader->backend_data; 3516 DWORD map; 3517 BOOL custom_linear_fog = FALSE; 3518 3519 char srgbtmp[4][4]; 3520 char ftoa_tmp[17]; 3521 unsigned int i, found = 0; 3522 3523 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) 3524 { 3525 if (!(map & 1) 3526 || (shader->u.ps.color0_mov && i == shader->u.ps.color0_reg) 3527 || (reg_maps->shader_version.major < 2 && !i)) 3528 continue; 3529 3530 sprintf(srgbtmp[found], "R%u", i); 3531 ++found; 3532 if (found == 4) break; 3533 } 3534 3535 switch(found) { 3536 case 0: 3537 sprintf(srgbtmp[0], "TA"); 3538 sprintf(srgbtmp[1], "TB"); 3539 sprintf(srgbtmp[2], "TC"); 3540 sprintf(srgbtmp[3], "TD"); 3541 dcl_td = TRUE; 3542 break; 3543 case 1: 3544 sprintf(srgbtmp[1], "TA"); 3545 sprintf(srgbtmp[2], "TB"); 3546 sprintf(srgbtmp[3], "TC"); 3547 break; 3548 case 2: 3549 sprintf(srgbtmp[2], "TA"); 3550 sprintf(srgbtmp[3], "TB"); 3551 break; 3552 case 3: 3553 sprintf(srgbtmp[3], "TA"); 3554 break; 3555 case 4: 3556 break; 3557 } 3558 3559 /* Create the hw ARB shader */ 3560 memset(&priv_ctx, 0, sizeof(priv_ctx)); 3561 priv_ctx.cur_ps_args = args; 3562 priv_ctx.compiled_fprog = compiled; 3563 priv_ctx.cur_np2fixup_info = &compiled->np2fixup_info; 3564 init_ps_input(shader, args, &priv_ctx); 3565 list_init(&priv_ctx.control_frames); 3566 priv_ctx.ps_post_process = args->super.srgb_correction; 3567 3568 /* Avoid enabling NV_fragment_program* if we do not need it. 3569 * 3570 * Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register, 3571 * and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation 3572 * is faster than what we gain from using higher native instructions. There are some things though 3573 * that cannot be emulated. In that case enable the extensions. 3574 * If the extension is enabled, instruction handlers that support both ways will use it. 3575 * 3576 * Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program. 3577 * So enable the best we can get. 3578 */ 3579 if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0 || reg_maps->usestexldd || 3580 reg_maps->usestexldl || reg_maps->usesfacing || reg_maps->usesifc || reg_maps->usescall) 3581 { 3582 want_nv_prog = TRUE; 3583 } 3584 3585 shader_addline(buffer, "!!ARBfp1.0\n"); 3586 if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM2]) 3587 { 3588 shader_addline(buffer, "OPTION NV_fragment_program2;\n"); 3589 priv_ctx.target_version = NV3; 3590 } 3591 else if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]) 3592 { 3593 shader_addline(buffer, "OPTION NV_fragment_program;\n"); 3594 priv_ctx.target_version = NV2; 3595 } else { 3596 if(want_nv_prog) 3597 { 3598 /* This is an error - either we're advertising the wrong shader version, or aren't enforcing some 3599 * limits properly 3600 */ 3601 ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n"); 3602 ERR("Try GLSL\n"); 3603 } 3604 priv_ctx.target_version = ARB; 3605 } 3606 3607 if (reg_maps->rt_mask > 1) 3608 { 3609 shader_addline(buffer, "OPTION ARB_draw_buffers;\n"); 3610 } 3611 3612 if (reg_maps->shader_version.major < 3) 3613 { 3614 switch (args->super.fog) 3615 { 3616 case WINED3D_FFP_PS_FOG_OFF: 3617 break; 3618 case WINED3D_FFP_PS_FOG_LINEAR: 3619 if (gl_info->quirks & WINED3D_QUIRK_BROKEN_ARB_FOG) 3620 { 3621 custom_linear_fog = TRUE; 3622 priv_ctx.ps_post_process = TRUE; 3623 break; 3624 } 3625 shader_addline(buffer, "OPTION ARB_fog_linear;\n"); 3626 break; 3627 case WINED3D_FFP_PS_FOG_EXP: 3628 shader_addline(buffer, "OPTION ARB_fog_exp;\n"); 3629 break; 3630 case WINED3D_FFP_PS_FOG_EXP2: 3631 shader_addline(buffer, "OPTION ARB_fog_exp2;\n"); 3632 break; 3633 } 3634 } 3635 3636 /* For now always declare the temps. At least the Nvidia assembler optimizes completely 3637 * unused temps away(but occupies them for the whole shader if they're used once). Always 3638 * declaring them avoids tricky bookkeeping work 3639 */ 3640 shader_addline(buffer, "TEMP TA;\n"); /* Used for modifiers */ 3641 shader_addline(buffer, "TEMP TB;\n"); /* Used for modifiers */ 3642 shader_addline(buffer, "TEMP TC;\n"); /* Used for modifiers */ 3643 if(dcl_td) shader_addline(buffer, "TEMP TD;\n"); /* Used for sRGB writing */ 3644 shader_addline(buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n"); 3645 shader_addline(buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n"); 3646 wined3d_ftoa(eps, ftoa_tmp); 3647 shader_addline(buffer, "PARAM ps_helper_const = { 0.0, 1.0, %s, 0.0 };\n", ftoa_tmp); 3648 3649 if (reg_maps->shader_version.major < 2) 3650 { 3651 strcpy(fragcolor, "R0"); 3652 } 3653 else 3654 { 3655 if (priv_ctx.ps_post_process) 3656 { 3657 if (shader->u.ps.color0_mov) 3658 { 3659 sprintf(fragcolor, "R%u", shader->u.ps.color0_reg); 3660 } 3661 else 3662 { 3663 shader_addline(buffer, "TEMP TMP_COLOR;\n"); 3664 strcpy(fragcolor, "TMP_COLOR"); 3665 } 3666 } else { 3667 strcpy(fragcolor, "result.color"); 3668 } 3669 } 3670 3671 if (args->super.srgb_correction) 3672 { 3673 shader_addline(buffer, "PARAM srgb_consts0 = "); 3674 shader_arb_append_imm_vec4(buffer, wined3d_srgb_const0); 3675 shader_addline(buffer, ";\n"); 3676 shader_addline(buffer, "PARAM srgb_consts1 = "); 3677 shader_arb_append_imm_vec4(buffer, wined3d_srgb_const1); 3678 shader_addline(buffer, ";\n"); 3679 } 3680 3681 /* Base Declarations */ 3682 shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info, NULL, &priv_ctx); 3683 3684 for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i) 3685 { 3686 unsigned char bump_const; 3687 3688 if (!(map & 1)) continue; 3689 3690 bump_const = compiled->numbumpenvmatconsts; 3691 compiled->bumpenvmatconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED; 3692 compiled->bumpenvmatconst[bump_const].texunit = i; 3693 compiled->luminanceconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED; 3694 compiled->luminanceconst[bump_const].texunit = i; 3695 3696 /* We can fit the constants into the constant limit for sure because texbem, texbeml, bem and beml are only supported 3697 * in 1.x shaders, and GL_ARB_fragment_program has a constant limit of 24 constants. So in the worst case we're loading 3698 * 8 shader constants, 8 bump matrices and 8 luminance parameters and are perfectly fine. (No NP2 fixup on bumpmapped 3699 * textures due to conditional NP2 restrictions) 3700 * 3701 * Use local constants to load the bump env parameters, not program.env. This avoids collisions with d3d constants of 3702 * shaders in newer shader models. Since the bump env parameters have to share their space with NP2 fixup constants, 3703 * their location is shader dependent anyway and they cannot be loaded globally. 3704 */ 3705 compiled->bumpenvmatconst[bump_const].const_num = next_local++; 3706 shader_addline(buffer, "PARAM bumpenvmat%d = program.local[%d];\n", 3707 i, compiled->bumpenvmatconst[bump_const].const_num); 3708 compiled->numbumpenvmatconsts = bump_const + 1; 3709 3710 if (!(reg_maps->luminanceparams & (1u << i))) 3711 continue; 3712 3713 compiled->luminanceconst[bump_const].const_num = next_local++; 3714 shader_addline(buffer, "PARAM luminance%d = program.local[%d];\n", 3715 i, compiled->luminanceconst[bump_const].const_num); 3716 } 3717 3718 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 3719 { 3720 compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED; 3721 if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2) 3722 { 3723 const DWORD *control_values = find_loop_control_values(shader, i); 3724 3725 if(control_values) 3726 { 3727 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i, 3728 control_values[0], control_values[1], control_values[2]); 3729 } 3730 else 3731 { 3732 compiled->int_consts[i] = next_local; 3733 compiled->num_int_consts++; 3734 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++); 3735 } 3736 } 3737 } 3738 3739 if(reg_maps->vpos || reg_maps->usesdsy) 3740 { 3741 compiled->ycorrection = next_local; 3742 shader_addline(buffer, "PARAM ycorrection = program.local[%u];\n", next_local++); 3743 3744 if(reg_maps->vpos) 3745 { 3746 shader_addline(buffer, "TEMP vpos;\n"); 3747 /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen). 3748 * ycorrection.y: -1.0(onscreen), 1.0(offscreen) 3749 * ycorrection.z: 1.0 3750 * ycorrection.w: 0.0 3751 */ 3752 shader_addline(buffer, "MAD vpos, fragment.position, ycorrection.zyww, ycorrection.wxww;\n"); 3753 shader_addline(buffer, "FLR vpos.xy, vpos;\n"); 3754 } 3755 } 3756 else 3757 { 3758 compiled->ycorrection = WINED3D_CONST_NUM_UNUSED; 3759 } 3760 3761 /* Load constants to fixup NP2 texcoords if there are still free constants left: 3762 * Constants (texture dimensions) for the NP2 fixup are loaded as local program parameters. This will consume 3763 * at most 8 (MAX_FRAGMENT_SAMPLERS / 2) parameters, which is highly unlikely, since the application had to 3764 * use 16 NP2 textures at the same time. In case that we run out of constants the fixup is simply not 3765 * applied / activated. This will probably result in wrong rendering of the texture, but will save us from 3766 * shader compilation errors and the subsequent errors when drawing with this shader. */ 3767 if (priv_ctx.cur_ps_args->super.np2_fixup) { 3768 unsigned char cur_fixup_sampler = 0; 3769 3770 struct arb_ps_np2fixup_info* const fixup = priv_ctx.cur_np2fixup_info; 3771 const WORD map = priv_ctx.cur_ps_args->super.np2_fixup; 3772 const UINT max_lconsts = gl_info->limits.arb_ps_local_constants; 3773 3774 fixup->offset = next_local; 3775 fixup->super.active = 0; 3776 3777 for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) 3778 { 3779 if (!(map & (1u << i))) 3780 continue; 3781 3782 if (fixup->offset + (cur_fixup_sampler >> 1) < max_lconsts) 3783 { 3784 fixup->super.active |= (1u << i); 3785 fixup->super.idx[i] = cur_fixup_sampler++; 3786 } 3787 else 3788 { 3789 FIXME("No free constant found to load NP2 fixup data into shader. " 3790 "Sampling from this texture will probably look wrong.\n"); 3791 break; 3792 } 3793 } 3794 3795 fixup->super.num_consts = (cur_fixup_sampler + 1) >> 1; 3796 if (fixup->super.num_consts) { 3797 shader_addline(buffer, "PARAM np2fixup[%u] = { program.env[%u..%u] };\n", 3798 fixup->super.num_consts, fixup->offset, fixup->super.num_consts + fixup->offset - 1); 3799 } 3800 } 3801 3802 if (shader_priv->clipplane_emulation != ~0U && args->clip) 3803 { 3804 shader_addline(buffer, "KIL fragment.texcoord[%u];\n", shader_priv->clipplane_emulation); 3805 } 3806 3807 /* Base Shader Body */ 3808 if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL))) 3809 return 0; 3810 3811 if(args->super.srgb_correction) { 3812 arbfp_add_sRGB_correction(buffer, fragcolor, srgbtmp[0], srgbtmp[1], srgbtmp[2], srgbtmp[3], 3813 priv_ctx.target_version >= NV2); 3814 } 3815 3816 if (custom_linear_fog) 3817 arbfp_add_linear_fog(buffer, fragcolor, "TA"); 3818 3819 if(strcmp(fragcolor, "result.color")) { 3820 shader_addline(buffer, "MOV result.color, %s;\n", fragcolor); 3821 } 3822 shader_addline(buffer, "END\n"); 3823 3824 /* TODO: change to resource.glObjectHandle or something like that */ 3825 GL_EXTCALL(glGenProgramsARB(1, &retval)); 3826 3827 TRACE("Creating a hw pixel shader, prg=%d\n", retval); 3828 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval)); 3829 3830 TRACE("Created hw pixel shader, prg=%d\n", retval); 3831 if (!shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer->buffer)) 3832 return 0; 3833 3834 return retval; 3835 } 3836 3837 static int compare_sig(const struct wined3d_shader_signature *sig1, const struct wined3d_shader_signature *sig2) 3838 { 3839 unsigned int i; 3840 int ret; 3841 3842 if (sig1->element_count != sig2->element_count) 3843 return sig1->element_count < sig2->element_count ? -1 : 1; 3844 3845 for (i = 0; i < sig1->element_count; ++i) 3846 { 3847 const struct wined3d_shader_signature_element *e1, *e2; 3848 3849 e1 = &sig1->elements[i]; 3850 e2 = &sig2->elements[i]; 3851 3852 if (!e1->semantic_name || !e2->semantic_name) 3853 { 3854 /* Compare pointers, not contents. One string is NULL (element 3855 * does not exist), the other one is not NULL. */ 3856 if (e1->semantic_name != e2->semantic_name) 3857 return e1->semantic_name < e2->semantic_name ? -1 : 1; 3858 continue; 3859 } 3860 3861 if ((ret = strcmp(e1->semantic_name, e2->semantic_name))) 3862 return ret; 3863 if (e1->semantic_idx != e2->semantic_idx) 3864 return e1->semantic_idx < e2->semantic_idx ? -1 : 1; 3865 if (e1->sysval_semantic != e2->sysval_semantic) 3866 return e1->sysval_semantic < e2->sysval_semantic ? -1 : 1; 3867 if (e1->component_type != e2->component_type) 3868 return e1->component_type < e2->component_type ? -1 : 1; 3869 if (e1->register_idx != e2->register_idx) 3870 return e1->register_idx < e2->register_idx ? -1 : 1; 3871 if (e1->mask != e2->mask) 3872 return e1->mask < e2->mask ? -1 : 1; 3873 } 3874 return 0; 3875 } 3876 3877 static void clone_sig(struct wined3d_shader_signature *new, const struct wined3d_shader_signature *sig) 3878 { 3879 unsigned int i; 3880 char *name; 3881 3882 new->element_count = sig->element_count; 3883 new->elements = heap_calloc(new->element_count, sizeof(*new->elements)); 3884 for (i = 0; i < sig->element_count; ++i) 3885 { 3886 new->elements[i] = sig->elements[i]; 3887 3888 if (!new->elements[i].semantic_name) 3889 continue; 3890 3891 /* Clone the semantic string */ 3892 name = heap_alloc(strlen(sig->elements[i].semantic_name) + 1); 3893 strcpy(name, sig->elements[i].semantic_name); 3894 new->elements[i].semantic_name = name; 3895 } 3896 } 3897 3898 static DWORD find_input_signature(struct shader_arb_priv *priv, const struct wined3d_shader_signature *sig) 3899 { 3900 struct wine_rb_entry *entry = wine_rb_get(&priv->signature_tree, sig); 3901 struct ps_signature *found_sig; 3902 3903 if (entry) 3904 { 3905 found_sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 3906 TRACE("Found existing signature %u\n", found_sig->idx); 3907 return found_sig->idx; 3908 } 3909 found_sig = heap_alloc_zero(sizeof(*found_sig)); 3910 clone_sig(&found_sig->sig, sig); 3911 found_sig->idx = priv->ps_sig_number++; 3912 TRACE("New signature stored and assigned number %u\n", found_sig->idx); 3913 if(wine_rb_put(&priv->signature_tree, sig, &found_sig->entry) == -1) 3914 { 3915 ERR("Failed to insert program entry.\n"); 3916 } 3917 return found_sig->idx; 3918 } 3919 3920 static void init_output_registers(const struct wined3d_shader *shader, 3921 const struct wined3d_shader_signature *ps_input_sig, 3922 struct shader_arb_ctx_priv *priv_ctx, struct arb_vs_compiled_shader *compiled) 3923 { 3924 unsigned int i, j; 3925 static const char * const texcoords[8] = 3926 { 3927 "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]", 3928 "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]" 3929 }; 3930 /* Write generic input varyings 0 to 7 to result.texcoord[], varying 8 to result.color.primary 3931 * and varying 9 to result.color.secondary 3932 */ 3933 static const char * const decl_idx_to_string[MAX_REG_INPUT] = 3934 { 3935 "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]", 3936 "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]", 3937 "result.color.primary", "result.color.secondary" 3938 }; 3939 3940 if (!ps_input_sig) 3941 { 3942 TRACE("Pixel shader uses builtin varyings\n"); 3943 /* Map builtins to builtins */ 3944 for(i = 0; i < 8; i++) 3945 { 3946 priv_ctx->texcrd_output[i] = texcoords[i]; 3947 } 3948 priv_ctx->color_output[0] = "result.color.primary"; 3949 priv_ctx->color_output[1] = "result.color.secondary"; 3950 priv_ctx->fog_output = "TMP_FOGCOORD"; 3951 3952 /* Map declared regs to builtins. Use "TA" to /dev/null unread output */ 3953 for (i = 0; i < shader->output_signature.element_count; ++i) 3954 { 3955 const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i]; 3956 3957 if (!output->semantic_name) 3958 continue; 3959 3960 if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_POSITION)) 3961 { 3962 TRACE("o%u is TMP_OUT\n", output->register_idx); 3963 if (!output->semantic_idx) 3964 priv_ctx->vs_output[output->register_idx] = "TMP_OUT"; 3965 else 3966 priv_ctx->vs_output[output->register_idx] = "TA"; 3967 } 3968 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_PSIZE)) 3969 { 3970 TRACE("o%u is result.pointsize\n", output->register_idx); 3971 if (!output->semantic_idx) 3972 priv_ctx->vs_output[output->register_idx] = "result.pointsize"; 3973 else 3974 priv_ctx->vs_output[output->register_idx] = "TA"; 3975 } 3976 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_COLOR)) 3977 { 3978 TRACE("o%u is result.color.?, idx %u\n", output->register_idx, output->semantic_idx); 3979 if (!output->semantic_idx) 3980 priv_ctx->vs_output[output->register_idx] = "result.color.primary"; 3981 else if (output->semantic_idx == 1) 3982 priv_ctx->vs_output[output->register_idx] = "result.color.secondary"; 3983 else priv_ctx->vs_output[output->register_idx] = "TA"; 3984 } 3985 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_TEXCOORD)) 3986 { 3987 TRACE("o%u is result.texcoord[%u]\n", output->register_idx, output->semantic_idx); 3988 if (output->semantic_idx >= 8) 3989 priv_ctx->vs_output[output->register_idx] = "TA"; 3990 else 3991 priv_ctx->vs_output[output->register_idx] = texcoords[output->semantic_idx]; 3992 } 3993 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_FOG)) 3994 { 3995 TRACE("o%u is result.fogcoord\n", output->register_idx); 3996 if (output->semantic_idx > 0) 3997 priv_ctx->vs_output[output->register_idx] = "TA"; 3998 else 3999 priv_ctx->vs_output[output->register_idx] = "result.fogcoord"; 4000 } 4001 else 4002 { 4003 priv_ctx->vs_output[output->register_idx] = "TA"; 4004 } 4005 } 4006 return; 4007 } 4008 4009 TRACE("Pixel shader uses declared varyings\n"); 4010 4011 /* Map builtin to declared. /dev/null the results by default to the TA temp reg */ 4012 for(i = 0; i < 8; i++) 4013 { 4014 priv_ctx->texcrd_output[i] = "TA"; 4015 } 4016 priv_ctx->color_output[0] = "TA"; 4017 priv_ctx->color_output[1] = "TA"; 4018 priv_ctx->fog_output = "TA"; 4019 4020 for (i = 0; i < ps_input_sig->element_count; ++i) 4021 { 4022 const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[i]; 4023 4024 if (!input->semantic_name) 4025 continue; 4026 4027 /* If a declared input register is not written by builtin arguments, don't write to it. 4028 * GL_NV_vertex_program makes sure the input defaults to 0.0, which is correct with D3D 4029 * 4030 * Don't care about POSITION and PSIZE here - this is a builtin vertex shader, position goes 4031 * to TMP_OUT in any case 4032 */ 4033 if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_TEXCOORD)) 4034 { 4035 if (input->semantic_idx < 8) 4036 priv_ctx->texcrd_output[input->semantic_idx] = decl_idx_to_string[input->register_idx]; 4037 } 4038 else if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_COLOR)) 4039 { 4040 if (input->semantic_idx < 2) 4041 priv_ctx->color_output[input->semantic_idx] = decl_idx_to_string[input->register_idx]; 4042 } 4043 else if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_FOG)) 4044 { 4045 if (!input->semantic_idx) 4046 priv_ctx->fog_output = decl_idx_to_string[input->register_idx]; 4047 } 4048 else 4049 { 4050 continue; 4051 } 4052 4053 if (!strcmp(decl_idx_to_string[input->register_idx], "result.color.primary") 4054 || !strcmp(decl_idx_to_string[input->register_idx], "result.color.secondary")) 4055 { 4056 compiled->need_color_unclamp = TRUE; 4057 } 4058 } 4059 4060 /* Map declared to declared */ 4061 for (i = 0; i < shader->output_signature.element_count; ++i) 4062 { 4063 const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i]; 4064 4065 /* Write unread output to TA to throw them away */ 4066 priv_ctx->vs_output[output->register_idx] = "TA"; 4067 4068 if (!output->semantic_name) 4069 continue; 4070 4071 if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_POSITION) && !output->semantic_idx) 4072 { 4073 priv_ctx->vs_output[output->register_idx] = "TMP_OUT"; 4074 continue; 4075 } 4076 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_PSIZE) && !output->semantic_idx) 4077 { 4078 priv_ctx->vs_output[output->register_idx] = "result.pointsize"; 4079 continue; 4080 } 4081 4082 for (j = 0; j < ps_input_sig->element_count; ++j) 4083 { 4084 const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[j]; 4085 4086 if (!input->semantic_name) 4087 continue; 4088 4089 if (!strcmp(input->semantic_name, output->semantic_name) 4090 && input->semantic_idx == output->semantic_idx) 4091 { 4092 priv_ctx->vs_output[output->register_idx] = decl_idx_to_string[input->register_idx]; 4093 4094 if (!strcmp(priv_ctx->vs_output[output->register_idx], "result.color.primary") 4095 || !strcmp(priv_ctx->vs_output[output->register_idx], "result.color.secondary")) 4096 { 4097 compiled->need_color_unclamp = TRUE; 4098 } 4099 } 4100 } 4101 } 4102 } 4103 4104 /* Context activation is done by the caller. */ 4105 static GLuint shader_arb_generate_vshader(const struct wined3d_shader *shader, 4106 const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer, 4107 const struct arb_vs_compile_args *args, struct arb_vs_compiled_shader *compiled, 4108 const struct wined3d_shader_signature *ps_input_sig) 4109 { 4110 const struct arb_vshader_private *shader_data = shader->backend_data; 4111 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 4112 struct shader_arb_priv *priv = shader->device->shader_priv; 4113 GLuint ret; 4114 DWORD next_local = 0; 4115 struct shader_arb_ctx_priv priv_ctx; 4116 unsigned int i; 4117 4118 memset(&priv_ctx, 0, sizeof(priv_ctx)); 4119 priv_ctx.cur_vs_args = args; 4120 list_init(&priv_ctx.control_frames); 4121 init_output_registers(shader, ps_input_sig, &priv_ctx, compiled); 4122 4123 /* Create the hw ARB shader */ 4124 shader_addline(buffer, "!!ARBvp1.0\n"); 4125 4126 /* Always enable the NV extension if available. Unlike fragment shaders, there is no 4127 * mesurable performance penalty, and we can always make use of it for clipplanes. 4128 */ 4129 if (gl_info->supported[NV_VERTEX_PROGRAM3]) 4130 { 4131 shader_addline(buffer, "OPTION NV_vertex_program3;\n"); 4132 priv_ctx.target_version = NV3; 4133 shader_addline(buffer, "ADDRESS aL;\n"); 4134 } 4135 else if (gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]) 4136 { 4137 shader_addline(buffer, "OPTION NV_vertex_program2;\n"); 4138 priv_ctx.target_version = NV2; 4139 shader_addline(buffer, "ADDRESS aL;\n"); 4140 } else { 4141 priv_ctx.target_version = ARB; 4142 } 4143 4144 shader_addline(buffer, "TEMP TMP_OUT;\n"); 4145 if (reg_maps->fog) 4146 shader_addline(buffer, "TEMP TMP_FOGCOORD;\n"); 4147 if (need_helper_const(shader_data, reg_maps, gl_info)) 4148 { 4149 char ftoa_tmp[17]; 4150 wined3d_ftoa(eps, ftoa_tmp); 4151 shader_addline(buffer, "PARAM helper_const = { 0.0, 1.0, 2.0, %s};\n", ftoa_tmp); 4152 } 4153 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) 4154 { 4155 shader_addline(buffer, "PARAM rel_addr_const = { 0.5, %d.0, 0.0, 0.0 };\n", shader_data->rel_offset); 4156 shader_addline(buffer, "TEMP A0_SHADOW;\n"); 4157 } 4158 4159 shader_addline(buffer, "TEMP TA;\n"); 4160 shader_addline(buffer, "TEMP TB;\n"); 4161 4162 /* Base Declarations */ 4163 shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info, 4164 &priv_ctx.vs_clipplanes, &priv_ctx); 4165 4166 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 4167 { 4168 compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED; 4169 if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2) 4170 { 4171 const DWORD *control_values = find_loop_control_values(shader, i); 4172 4173 if(control_values) 4174 { 4175 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i, 4176 control_values[0], control_values[1], control_values[2]); 4177 } 4178 else 4179 { 4180 compiled->int_consts[i] = next_local; 4181 compiled->num_int_consts++; 4182 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++); 4183 } 4184 } 4185 } 4186 4187 /* We need a constant to fixup the final position */ 4188 shader_addline(buffer, "PARAM posFixup = program.local[%u];\n", next_local); 4189 compiled->pos_fixup = next_local++; 4190 4191 /* Initialize output parameters. GL_ARB_vertex_program does not require special initialization values 4192 * for output parameters. D3D in theory does not do that either, but some applications depend on a 4193 * proper initialization of the secondary color, and programs using the fixed function pipeline without 4194 * a replacement shader depend on the texcoord.w being set properly. 4195 * 4196 * GL_NV_vertex_program defines that all output values are initialized to {0.0, 0.0, 0.0, 1.0}. This 4197 * assertion is in effect even when using GL_ARB_vertex_program without any NV specific additions. So 4198 * skip this if NV_vertex_program is supported. Otherwise, initialize the secondary color. For the tex- 4199 * coords, we have a flag in the opengl caps. Many cards do not require the texcoord being set, and 4200 * this can eat a number of instructions, so skip it unless this cap is set as well 4201 */ 4202 if (!gl_info->supported[NV_VERTEX_PROGRAM]) 4203 { 4204 const char *color_init = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_0001); 4205 shader_addline(buffer, "MOV result.color.secondary, %s;\n", color_init); 4206 4207 if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W && !priv->ffp_proj_control) 4208 { 4209 int i; 4210 const char *one = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ONE); 4211 for(i = 0; i < MAX_REG_TEXCRD; i++) 4212 { 4213 if (reg_maps->u.texcoord_mask[i] && reg_maps->u.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) 4214 shader_addline(buffer, "MOV result.texcoord[%u].w, %s\n", i, one); 4215 } 4216 } 4217 } 4218 4219 /* The shader starts with the main function */ 4220 priv_ctx.in_main_func = TRUE; 4221 /* Base Shader Body */ 4222 if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL))) 4223 return -1; 4224 4225 if (!priv_ctx.footer_written) vshader_add_footer(&priv_ctx, 4226 shader_data, args, reg_maps, gl_info, buffer); 4227 4228 shader_addline(buffer, "END\n"); 4229 4230 /* TODO: change to resource.glObjectHandle or something like that */ 4231 GL_EXTCALL(glGenProgramsARB(1, &ret)); 4232 4233 TRACE("Creating a hw vertex shader, prg=%d\n", ret); 4234 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, ret)); 4235 4236 TRACE("Created hw vertex shader, prg=%d\n", ret); 4237 if (!shader_arb_compile(gl_info, GL_VERTEX_PROGRAM_ARB, buffer->buffer)) 4238 return -1; 4239 4240 return ret; 4241 } 4242 4243 /* Context activation is done by the caller. */ 4244 static struct arb_ps_compiled_shader *find_arb_pshader(struct wined3d_shader *shader, 4245 const struct arb_ps_compile_args *args) 4246 { 4247 struct wined3d_device *device = shader->device; 4248 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 4249 const struct wined3d_d3d_info *d3d_info = &device->adapter->d3d_info; 4250 UINT i; 4251 DWORD new_size; 4252 struct arb_ps_compiled_shader *new_array; 4253 struct wined3d_string_buffer buffer; 4254 struct arb_pshader_private *shader_data; 4255 GLuint ret; 4256 4257 if (!shader->backend_data) 4258 { 4259 struct shader_arb_priv *priv = device->shader_priv; 4260 4261 shader->backend_data = heap_alloc_zero(sizeof(*shader_data)); 4262 shader_data = shader->backend_data; 4263 shader_data->clamp_consts = shader->reg_maps.shader_version.major == 1; 4264 4265 if (shader->reg_maps.shader_version.major < 3) 4266 shader_data->input_signature_idx = ~0U; 4267 else 4268 shader_data->input_signature_idx = find_input_signature(priv, &shader->input_signature); 4269 4270 TRACE("Shader got assigned input signature index %u\n", shader_data->input_signature_idx); 4271 4272 if (!d3d_info->vs_clipping) 4273 shader_data->clipplane_emulation = shader_find_free_input_register(&shader->reg_maps, 4274 d3d_info->limits.ffp_blend_stages - 1); 4275 else 4276 shader_data->clipplane_emulation = ~0U; 4277 } 4278 shader_data = shader->backend_data; 4279 4280 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 4281 * so a linear search is more performant than a hashmap or a binary search 4282 * (cache coherency etc) 4283 */ 4284 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4285 { 4286 if (!memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args))) 4287 return &shader_data->gl_shaders[i]; 4288 } 4289 4290 TRACE("No matching GL shader found, compiling a new shader\n"); 4291 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 4292 if (shader_data->num_gl_shaders) 4293 { 4294 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 4295 new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders, 4296 new_size * sizeof(*shader_data->gl_shaders)); 4297 } 4298 else 4299 { 4300 new_array = heap_alloc_zero(sizeof(*shader_data->gl_shaders)); 4301 new_size = 1; 4302 } 4303 4304 if(!new_array) { 4305 ERR("Out of memory\n"); 4306 return 0; 4307 } 4308 shader_data->gl_shaders = new_array; 4309 shader_data->shader_array_size = new_size; 4310 } 4311 4312 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 4313 4314 pixelshader_update_resource_types(shader, args->super.tex_types); 4315 4316 if (!string_buffer_init(&buffer)) 4317 { 4318 ERR("Failed to initialize shader buffer.\n"); 4319 return 0; 4320 } 4321 4322 ret = shader_arb_generate_pshader(shader, gl_info, &buffer, args, 4323 &shader_data->gl_shaders[shader_data->num_gl_shaders]); 4324 string_buffer_free(&buffer); 4325 shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret; 4326 4327 return &shader_data->gl_shaders[shader_data->num_gl_shaders++]; 4328 } 4329 4330 static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new, 4331 const DWORD use_map, BOOL skip_int) { 4332 if((stored->super.swizzle_map & use_map) != new->super.swizzle_map) return FALSE; 4333 if(stored->super.clip_enabled != new->super.clip_enabled) return FALSE; 4334 if(stored->super.fog_src != new->super.fog_src) return FALSE; 4335 if(stored->clip.boolclip_compare != new->clip.boolclip_compare) return FALSE; 4336 if(stored->ps_signature != new->ps_signature) return FALSE; 4337 if(stored->vertex.samplers_compare != new->vertex.samplers_compare) return FALSE; 4338 if(skip_int) return TRUE; 4339 4340 return !memcmp(stored->loop_ctrl, new->loop_ctrl, sizeof(stored->loop_ctrl)); 4341 } 4342 4343 static struct arb_vs_compiled_shader *find_arb_vshader(struct wined3d_shader *shader, 4344 const struct wined3d_gl_info *gl_info, DWORD use_map, const struct arb_vs_compile_args *args, 4345 const struct wined3d_shader_signature *ps_input_sig) 4346 { 4347 UINT i; 4348 DWORD new_size; 4349 struct arb_vs_compiled_shader *new_array; 4350 struct wined3d_string_buffer buffer; 4351 struct arb_vshader_private *shader_data; 4352 GLuint ret; 4353 4354 if (!shader->backend_data) 4355 { 4356 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 4357 4358 shader->backend_data = heap_alloc_zero(sizeof(*shader_data)); 4359 shader_data = shader->backend_data; 4360 4361 if ((gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) 4362 && reg_maps->min_rel_offset <= reg_maps->max_rel_offset) 4363 { 4364 if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 127) 4365 { 4366 FIXME("The difference between the minimum and maximum relative offset is > 127.\n"); 4367 FIXME("Which this OpenGL implementation does not support. Try using GLSL.\n"); 4368 FIXME("Min: %u, Max: %u.\n", reg_maps->min_rel_offset, reg_maps->max_rel_offset); 4369 } 4370 else if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 63) 4371 shader_data->rel_offset = reg_maps->min_rel_offset + 63; 4372 else if (reg_maps->max_rel_offset > 63) 4373 shader_data->rel_offset = reg_maps->min_rel_offset; 4374 } 4375 } 4376 shader_data = shader->backend_data; 4377 4378 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 4379 * so a linear search is more performant than a hashmap or a binary search 4380 * (cache coherency etc) 4381 */ 4382 for(i = 0; i < shader_data->num_gl_shaders; i++) { 4383 if (vs_args_equal(&shader_data->gl_shaders[i].args, args, 4384 use_map, gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])) 4385 { 4386 return &shader_data->gl_shaders[i]; 4387 } 4388 } 4389 4390 TRACE("No matching GL shader found, compiling a new shader\n"); 4391 4392 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 4393 if (shader_data->num_gl_shaders) 4394 { 4395 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 4396 new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders, 4397 new_size * sizeof(*shader_data->gl_shaders)); 4398 } 4399 else 4400 { 4401 new_array = heap_alloc_zero(sizeof(*shader_data->gl_shaders)); 4402 new_size = 1; 4403 } 4404 4405 if(!new_array) { 4406 ERR("Out of memory\n"); 4407 return 0; 4408 } 4409 shader_data->gl_shaders = new_array; 4410 shader_data->shader_array_size = new_size; 4411 } 4412 4413 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 4414 4415 if (!string_buffer_init(&buffer)) 4416 { 4417 ERR("Failed to initialize shader buffer.\n"); 4418 return 0; 4419 } 4420 4421 ret = shader_arb_generate_vshader(shader, gl_info, &buffer, args, 4422 &shader_data->gl_shaders[shader_data->num_gl_shaders], 4423 ps_input_sig); 4424 string_buffer_free(&buffer); 4425 shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret; 4426 4427 return &shader_data->gl_shaders[shader_data->num_gl_shaders++]; 4428 } 4429 4430 static void find_arb_ps_compile_args(const struct wined3d_state *state, 4431 const struct wined3d_context *context, const struct wined3d_shader *shader, 4432 struct arb_ps_compile_args *args) 4433 { 4434 const struct wined3d_gl_info *gl_info = context->gl_info; 4435 const struct wined3d_d3d_info *d3d_info = context->d3d_info; 4436 int i; 4437 WORD int_skip; 4438 4439 find_ps_compile_args(state, shader, context->stream_info.position_transformed, &args->super, context); 4440 4441 /* This forces all local boolean constants to 1 to make them stateblock independent */ 4442 args->bools = shader->reg_maps.local_bool_consts; 4443 4444 for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) 4445 { 4446 if (state->ps_consts_b[i]) 4447 args->bools |= ( 1u << i); 4448 } 4449 4450 /* Only enable the clip plane emulation KIL if at least one clipplane is enabled. The KIL instruction 4451 * is quite expensive because it forces the driver to disable early Z discards. It is cheaper to 4452 * duplicate the shader than have a no-op KIL instruction in every shader 4453 */ 4454 if (!d3d_info->vs_clipping && use_vs(state) 4455 && state->render_states[WINED3D_RS_CLIPPING] 4456 && state->render_states[WINED3D_RS_CLIPPLANEENABLE]) 4457 args->clip = 1; 4458 else 4459 args->clip = 0; 4460 4461 /* Skip if unused or local, or supported natively */ 4462 int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts; 4463 if (int_skip == 0xffff || gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]) 4464 { 4465 memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl)); 4466 return; 4467 } 4468 4469 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 4470 { 4471 if (int_skip & (1u << i)) 4472 { 4473 args->loop_ctrl[i][0] = 0; 4474 args->loop_ctrl[i][1] = 0; 4475 args->loop_ctrl[i][2] = 0; 4476 } 4477 else 4478 { 4479 args->loop_ctrl[i][0] = state->ps_consts_i[i].x; 4480 args->loop_ctrl[i][1] = state->ps_consts_i[i].y; 4481 args->loop_ctrl[i][2] = state->ps_consts_i[i].z; 4482 } 4483 } 4484 } 4485 4486 static void find_arb_vs_compile_args(const struct wined3d_state *state, 4487 const struct wined3d_context *context, const struct wined3d_shader *shader, 4488 struct arb_vs_compile_args *args) 4489 { 4490 const struct wined3d_device *device = shader->device; 4491 const struct wined3d_adapter *adapter = device->adapter; 4492 const struct wined3d_gl_info *gl_info = context->gl_info; 4493 const struct wined3d_d3d_info *d3d_info = context->d3d_info; 4494 int i; 4495 WORD int_skip; 4496 4497 find_vs_compile_args(state, shader, context->stream_info.swizzle_map, &args->super, context); 4498 4499 args->clip.boolclip_compare = 0; 4500 if (use_ps(state)) 4501 { 4502 const struct wined3d_shader *ps = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 4503 const struct arb_pshader_private *shader_priv = ps->backend_data; 4504 args->ps_signature = shader_priv->input_signature_idx; 4505 4506 args->clip.boolclip.clip_texcoord = shader_priv->clipplane_emulation + 1; 4507 } 4508 else 4509 { 4510 args->ps_signature = ~0; 4511 if (!d3d_info->vs_clipping && adapter->fragment_pipe == &arbfp_fragment_pipeline) 4512 args->clip.boolclip.clip_texcoord = ffp_clip_emul(context) ? d3d_info->limits.ffp_blend_stages : 0; 4513 /* Otherwise: Setting boolclip_compare set clip_texcoord to 0 */ 4514 } 4515 4516 if (args->clip.boolclip.clip_texcoord) 4517 { 4518 if (state->render_states[WINED3D_RS_CLIPPING]) 4519 args->clip.boolclip.clipplane_mask = (unsigned char)state->render_states[WINED3D_RS_CLIPPLANEENABLE]; 4520 /* clipplane_mask was set to 0 by setting boolclip_compare to 0 */ 4521 } 4522 4523 /* This forces all local boolean constants to 1 to make them stateblock independent */ 4524 args->clip.boolclip.bools = shader->reg_maps.local_bool_consts; 4525 /* TODO: Figure out if it would be better to store bool constants as bitmasks in the stateblock */ 4526 for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) 4527 { 4528 if (state->vs_consts_b[i]) 4529 args->clip.boolclip.bools |= (1u << i); 4530 } 4531 4532 args->vertex.samplers[0] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 0]; 4533 args->vertex.samplers[1] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 1]; 4534 args->vertex.samplers[2] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 2]; 4535 args->vertex.samplers[3] = 0; 4536 4537 /* Skip if unused or local */ 4538 int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts; 4539 /* This is about flow control, not clipping. */ 4540 if (int_skip == 0xffff || gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]) 4541 { 4542 memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl)); 4543 return; 4544 } 4545 4546 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 4547 { 4548 if (int_skip & (1u << i)) 4549 { 4550 args->loop_ctrl[i][0] = 0; 4551 args->loop_ctrl[i][1] = 0; 4552 args->loop_ctrl[i][2] = 0; 4553 } 4554 else 4555 { 4556 args->loop_ctrl[i][0] = state->vs_consts_i[i].x; 4557 args->loop_ctrl[i][1] = state->vs_consts_i[i].y; 4558 args->loop_ctrl[i][2] = state->vs_consts_i[i].z; 4559 } 4560 } 4561 } 4562 4563 /* Context activation is done by the caller. */ 4564 static void shader_arb_select(void *shader_priv, struct wined3d_context *context, 4565 const struct wined3d_state *state) 4566 { 4567 struct shader_arb_priv *priv = shader_priv; 4568 const struct wined3d_gl_info *gl_info = context->gl_info; 4569 int i; 4570 4571 /* Deal with pixel shaders first so the vertex shader arg function has the input signature ready */ 4572 if (use_ps(state)) 4573 { 4574 struct wined3d_shader *ps = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 4575 struct arb_ps_compile_args compile_args; 4576 struct arb_ps_compiled_shader *compiled; 4577 4578 TRACE("Using pixel shader %p.\n", ps); 4579 find_arb_ps_compile_args(state, context, ps, &compile_args); 4580 compiled = find_arb_pshader(ps, &compile_args); 4581 priv->current_fprogram_id = compiled->prgId; 4582 priv->compiled_fprog = compiled; 4583 4584 /* Bind the fragment program */ 4585 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id)); 4586 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id);"); 4587 4588 if (!priv->use_arbfp_fixed_func) 4589 priv->fragment_pipe->enable_extension(gl_info, FALSE); 4590 4591 /* Enable OpenGL fragment programs. */ 4592 gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB); 4593 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);"); 4594 4595 TRACE("Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", priv->current_fprogram_id); 4596 4597 /* Pixel Shader 1.x constants are clamped to [-1;1], Pixel Shader 2.0 constants are not. If switching between 4598 * a 1.x and newer shader, reload the first 8 constants 4599 */ 4600 if (priv->last_ps_const_clamped != ((struct arb_pshader_private *)ps->backend_data)->clamp_consts) 4601 { 4602 priv->last_ps_const_clamped = ((struct arb_pshader_private *)ps->backend_data)->clamp_consts; 4603 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, 8); 4604 for(i = 0; i < 8; i++) 4605 { 4606 priv->pshader_const_dirty[i] = 1; 4607 } 4608 /* Also takes care of loading local constants */ 4609 shader_arb_load_constants_internal(shader_priv, context, state, TRUE, FALSE, TRUE); 4610 } 4611 else 4612 { 4613 UINT rt_height = state->fb->render_targets[0]->height; 4614 shader_arb_ps_local_constants(compiled, context, state, rt_height); 4615 } 4616 4617 /* Force constant reloading for the NP2 fixup (see comment in shader_glsl_select for more info) */ 4618 if (compiled->np2fixup_info.super.active) 4619 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; 4620 4621 if (ps->load_local_constsF) 4622 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_F; 4623 } 4624 else 4625 { 4626 if (gl_info->supported[ARB_FRAGMENT_PROGRAM] && !priv->use_arbfp_fixed_func) 4627 { 4628 /* Disable only if we're not using arbfp fixed function fragment 4629 * processing. If this is used, keep GL_FRAGMENT_PROGRAM_ARB 4630 * enabled, and the fixed function pipeline will bind the fixed 4631 * function replacement shader. */ 4632 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 4633 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 4634 priv->current_fprogram_id = 0; 4635 } 4636 priv->fragment_pipe->enable_extension(gl_info, TRUE); 4637 } 4638 4639 if (use_vs(state)) 4640 { 4641 struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; 4642 struct arb_vs_compile_args compile_args; 4643 struct arb_vs_compiled_shader *compiled; 4644 const struct wined3d_shader_signature *ps_input_sig; 4645 4646 TRACE("Using vertex shader %p\n", vs); 4647 find_arb_vs_compile_args(state, context, vs, &compile_args); 4648 4649 /* Instead of searching for the signature in the signature list, read the one from the 4650 * current pixel shader. It's maybe not the shader where the signature came from, but it 4651 * is the same signature and faster to find. */ 4652 if (compile_args.ps_signature == ~0U) 4653 ps_input_sig = NULL; 4654 else 4655 ps_input_sig = &state->shader[WINED3D_SHADER_TYPE_PIXEL]->input_signature; 4656 4657 compiled = find_arb_vshader(vs, context->gl_info, context->stream_info.use_map, 4658 &compile_args, ps_input_sig); 4659 priv->current_vprogram_id = compiled->prgId; 4660 priv->compiled_vprog = compiled; 4661 4662 /* Bind the vertex program */ 4663 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id)); 4664 checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id);"); 4665 4666 priv->vertex_pipe->vp_enable(gl_info, FALSE); 4667 4668 /* Enable OpenGL vertex programs */ 4669 gl_info->gl_ops.gl.p_glEnable(GL_VERTEX_PROGRAM_ARB); 4670 checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);"); 4671 TRACE("Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", priv->current_vprogram_id); 4672 shader_arb_vs_local_constants(compiled, context, state); 4673 4674 if(priv->last_vs_color_unclamp != compiled->need_color_unclamp) { 4675 priv->last_vs_color_unclamp = compiled->need_color_unclamp; 4676 4677 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT]) 4678 { 4679 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, !compiled->need_color_unclamp)); 4680 checkGLcall("glClampColorARB"); 4681 } else { 4682 FIXME("vertex color clamp needs to be changed, but extension not supported.\n"); 4683 } 4684 } 4685 4686 if (vs->load_local_constsF) 4687 context->constant_update_mask |= WINED3D_SHADER_CONST_VS_F; 4688 } 4689 else 4690 { 4691 if (gl_info->supported[ARB_VERTEX_PROGRAM]) 4692 { 4693 priv->current_vprogram_id = 0; 4694 gl_info->gl_ops.gl.p_glDisable(GL_VERTEX_PROGRAM_ARB); 4695 checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)"); 4696 } 4697 priv->vertex_pipe->vp_enable(gl_info, TRUE); 4698 } 4699 } 4700 4701 static void shader_arb_select_compute(void *shader_priv, struct wined3d_context *context, 4702 const struct wined3d_state *state) 4703 { 4704 ERR("Compute pipeline not supported by the ARB shader backend.\n"); 4705 } 4706 4707 /* Context activation is done by the caller. */ 4708 static void shader_arb_disable(void *shader_priv, struct wined3d_context *context) 4709 { 4710 const struct wined3d_gl_info *gl_info = context->gl_info; 4711 struct shader_arb_priv *priv = shader_priv; 4712 4713 if (gl_info->supported[ARB_FRAGMENT_PROGRAM]) 4714 { 4715 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 4716 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 4717 priv->current_fprogram_id = 0; 4718 } 4719 priv->fragment_pipe->enable_extension(gl_info, FALSE); 4720 4721 if (gl_info->supported[ARB_VERTEX_PROGRAM]) 4722 { 4723 priv->current_vprogram_id = 0; 4724 gl_info->gl_ops.gl.p_glDisable(GL_VERTEX_PROGRAM_ARB); 4725 checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)"); 4726 } 4727 priv->vertex_pipe->vp_enable(gl_info, FALSE); 4728 4729 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT] && priv->last_vs_color_unclamp) 4730 { 4731 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, GL_FIXED_ONLY_ARB)); 4732 checkGLcall("glClampColorARB"); 4733 priv->last_vs_color_unclamp = FALSE; 4734 } 4735 4736 context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL) 4737 | (1u << WINED3D_SHADER_TYPE_VERTEX) 4738 | (1u << WINED3D_SHADER_TYPE_GEOMETRY) 4739 | (1u << WINED3D_SHADER_TYPE_HULL) 4740 | (1u << WINED3D_SHADER_TYPE_DOMAIN) 4741 | (1u << WINED3D_SHADER_TYPE_COMPUTE); 4742 } 4743 4744 static void shader_arb_destroy(struct wined3d_shader *shader) 4745 { 4746 struct wined3d_device *device = shader->device; 4747 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 4748 4749 if (shader_is_pshader_version(shader->reg_maps.shader_version.type)) 4750 { 4751 struct arb_pshader_private *shader_data = shader->backend_data; 4752 UINT i; 4753 4754 if(!shader_data) return; /* This can happen if a shader was never compiled */ 4755 4756 if (shader_data->num_gl_shaders) 4757 { 4758 struct wined3d_context *context = context_acquire(device, NULL, 0); 4759 4760 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4761 { 4762 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId)); 4763 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))"); 4764 } 4765 4766 context_release(context); 4767 } 4768 4769 heap_free(shader_data->gl_shaders); 4770 heap_free(shader_data); 4771 shader->backend_data = NULL; 4772 } 4773 else 4774 { 4775 struct arb_vshader_private *shader_data = shader->backend_data; 4776 UINT i; 4777 4778 if(!shader_data) return; /* This can happen if a shader was never compiled */ 4779 4780 if (shader_data->num_gl_shaders) 4781 { 4782 struct wined3d_context *context = context_acquire(device, NULL, 0); 4783 4784 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4785 { 4786 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId)); 4787 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))"); 4788 } 4789 4790 context_release(context); 4791 } 4792 4793 heap_free(shader_data->gl_shaders); 4794 heap_free(shader_data); 4795 shader->backend_data = NULL; 4796 } 4797 } 4798 4799 static int sig_tree_compare(const void *key, const struct wine_rb_entry *entry) 4800 { 4801 struct ps_signature *e = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 4802 return compare_sig(key, &e->sig); 4803 } 4804 4805 static HRESULT shader_arb_alloc(struct wined3d_device *device, const struct wined3d_vertex_pipe_ops *vertex_pipe, 4806 const struct fragment_pipeline *fragment_pipe) 4807 { 4808 const struct wined3d_d3d_info *d3d_info = &device->adapter->d3d_info; 4809 struct fragment_caps fragment_caps; 4810 void *vertex_priv, *fragment_priv; 4811 struct shader_arb_priv *priv; 4812 4813 if (!(priv = heap_alloc_zero(sizeof(*priv)))) 4814 return E_OUTOFMEMORY; 4815 4816 if (!(vertex_priv = vertex_pipe->vp_alloc(&arb_program_shader_backend, priv))) 4817 { 4818 ERR("Failed to initialize vertex pipe.\n"); 4819 heap_free(priv); 4820 return E_FAIL; 4821 } 4822 4823 if (!(fragment_priv = fragment_pipe->alloc_private(&arb_program_shader_backend, priv))) 4824 { 4825 ERR("Failed to initialize fragment pipe.\n"); 4826 vertex_pipe->vp_free(device); 4827 heap_free(priv); 4828 return E_FAIL; 4829 } 4830 4831 memset(priv->vshader_const_dirty, 1, 4832 sizeof(*priv->vshader_const_dirty) * d3d_info->limits.vs_uniform_count); 4833 memset(priv->pshader_const_dirty, 1, 4834 sizeof(*priv->pshader_const_dirty) * d3d_info->limits.ps_uniform_count); 4835 4836 wine_rb_init(&priv->signature_tree, sig_tree_compare); 4837 4838 priv->vertex_pipe = vertex_pipe; 4839 priv->fragment_pipe = fragment_pipe; 4840 fragment_pipe->get_caps(&device->adapter->gl_info, &fragment_caps); 4841 priv->ffp_proj_control = fragment_caps.wined3d_caps & WINED3D_FRAGMENT_CAP_PROJ_CONTROL; 4842 4843 device->vertex_priv = vertex_priv; 4844 device->fragment_priv = fragment_priv; 4845 device->shader_priv = priv; 4846 4847 return WINED3D_OK; 4848 } 4849 4850 static void release_signature(struct wine_rb_entry *entry, void *context) 4851 { 4852 struct ps_signature *sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 4853 unsigned int i; 4854 4855 for (i = 0; i < sig->sig.element_count; ++i) 4856 { 4857 heap_free((char *)sig->sig.elements[i].semantic_name); 4858 } 4859 heap_free(sig->sig.elements); 4860 heap_free(sig); 4861 } 4862 4863 /* Context activation is done by the caller. */ 4864 static void shader_arb_free(struct wined3d_device *device) 4865 { 4866 struct shader_arb_priv *priv = device->shader_priv; 4867 4868 wine_rb_destroy(&priv->signature_tree, release_signature, NULL); 4869 priv->fragment_pipe->free_private(device); 4870 priv->vertex_pipe->vp_free(device); 4871 heap_free(device->shader_priv); 4872 } 4873 4874 static BOOL shader_arb_allocate_context_data(struct wined3d_context *context) 4875 { 4876 return TRUE; 4877 } 4878 4879 static void shader_arb_free_context_data(struct wined3d_context *context) 4880 { 4881 struct shader_arb_priv *priv; 4882 4883 priv = context->device->shader_priv; 4884 if (priv->last_context == context) 4885 priv->last_context = NULL; 4886 } 4887 4888 static void shader_arb_init_context_state(struct wined3d_context *context) {} 4889 4890 static void shader_arb_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps) 4891 { 4892 if (gl_info->supported[ARB_VERTEX_PROGRAM]) 4893 { 4894 DWORD vs_consts; 4895 UINT vs_version; 4896 4897 /* 96 is the minimum allowed value of MAX_PROGRAM_ENV_PARAMETERS_ARB 4898 * for vertex programs. If the native limit is less than that it's 4899 * not very useful, and e.g. Mesa swrast returns 0, probably to 4900 * indicate it's a software implementation. */ 4901 if (gl_info->limits.arb_vs_native_constants < 96) 4902 vs_consts = gl_info->limits.arb_vs_float_constants; 4903 else 4904 vs_consts = min(gl_info->limits.arb_vs_float_constants, gl_info->limits.arb_vs_native_constants); 4905 4906 if (gl_info->supported[NV_VERTEX_PROGRAM3]) 4907 { 4908 vs_version = 3; 4909 TRACE("Hardware vertex shader version 3.0 enabled (NV_VERTEX_PROGRAM3)\n"); 4910 } 4911 else if (vs_consts >= 256) 4912 { 4913 /* Shader Model 2.0 requires at least 256 vertex shader constants */ 4914 vs_version = 2; 4915 TRACE("Hardware vertex shader version 2.0 enabled (ARB_PROGRAM)\n"); 4916 } 4917 else 4918 { 4919 vs_version = 1; 4920 TRACE("Hardware vertex shader version 1.1 enabled (ARB_PROGRAM)\n"); 4921 } 4922 caps->vs_version = min(wined3d_settings.max_sm_vs, vs_version); 4923 caps->vs_uniform_count = min(WINED3D_MAX_VS_CONSTS_F, vs_consts); 4924 } 4925 else 4926 { 4927 caps->vs_version = 0; 4928 caps->vs_uniform_count = 0; 4929 } 4930 4931 caps->hs_version = 0; 4932 caps->ds_version = 0; 4933 caps->gs_version = 0; 4934 caps->cs_version = 0; 4935 4936 if (gl_info->supported[ARB_FRAGMENT_PROGRAM]) 4937 { 4938 DWORD ps_consts; 4939 UINT ps_version; 4940 4941 /* Similar as above for vertex programs, but the minimum for fragment 4942 * programs is 24. */ 4943 if (gl_info->limits.arb_ps_native_constants < 24) 4944 ps_consts = gl_info->limits.arb_ps_float_constants; 4945 else 4946 ps_consts = min(gl_info->limits.arb_ps_float_constants, gl_info->limits.arb_ps_native_constants); 4947 4948 if (gl_info->supported[NV_FRAGMENT_PROGRAM2]) 4949 { 4950 ps_version = 3; 4951 TRACE("Hardware pixel shader version 3.0 enabled (NV_FRAGMENT_PROGRAM2)\n"); 4952 } 4953 else if (ps_consts >= 32) 4954 { 4955 /* Shader Model 2.0 requires at least 32 pixel shader constants */ 4956 ps_version = 2; 4957 TRACE("Hardware pixel shader version 2.0 enabled (ARB_PROGRAM)\n"); 4958 } 4959 else 4960 { 4961 ps_version = 1; 4962 TRACE("Hardware pixel shader version 1.4 enabled (ARB_PROGRAM)\n"); 4963 } 4964 caps->ps_version = min(wined3d_settings.max_sm_ps, ps_version); 4965 caps->ps_uniform_count = min(WINED3D_MAX_PS_CONSTS_F, ps_consts); 4966 caps->ps_1x_max_value = 8.0f; 4967 } 4968 else 4969 { 4970 caps->ps_version = 0; 4971 caps->ps_uniform_count = 0; 4972 caps->ps_1x_max_value = 0.0f; 4973 } 4974 4975 caps->varying_count = 0; 4976 caps->wined3d_caps = WINED3D_SHADER_CAP_SRGB_WRITE; 4977 if (use_nv_clip(gl_info)) 4978 caps->wined3d_caps |= WINED3D_SHADER_CAP_VS_CLIPPING; 4979 } 4980 4981 static BOOL shader_arb_color_fixup_supported(struct color_fixup_desc fixup) 4982 { 4983 /* We support everything except complex conversions. */ 4984 return !is_complex_fixup(fixup); 4985 } 4986 4987 static void shader_arb_add_instruction_modifiers(const struct wined3d_shader_instruction *ins) { 4988 DWORD shift; 4989 char write_mask[20], regstr[50]; 4990 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 4991 BOOL is_color = FALSE; 4992 const struct wined3d_shader_dst_param *dst; 4993 4994 if (!ins->dst_count) return; 4995 4996 dst = &ins->dst[0]; 4997 shift = dst->shift; 4998 if (!shift) return; /* Saturate alone is handled by the instructions */ 4999 5000 shader_arb_get_write_mask(ins, dst, write_mask); 5001 shader_arb_get_register_name(ins, &dst->reg, regstr, &is_color); 5002 5003 /* Generate a line that does the output modifier computation 5004 * FIXME: _SAT vs shift? _SAT alone is already handled in the instructions, if this 5005 * maps problems in e.g. _d4_sat modify shader_arb_get_modifier 5006 */ 5007 shader_addline(buffer, "MUL%s %s%s, %s, %s;\n", shader_arb_get_modifier(ins), 5008 regstr, write_mask, regstr, shift_tab[shift]); 5009 } 5010 5011 static const SHADER_HANDLER shader_arb_instruction_handler_table[WINED3DSIH_TABLE_SIZE] = 5012 { 5013 /* WINED3DSIH_ABS */ shader_hw_map2gl, 5014 /* WINED3DSIH_ADD */ shader_hw_map2gl, 5015 /* WINED3DSIH_AND */ NULL, 5016 /* WINED3DSIH_ATOMIC_AND */ NULL, 5017 /* WINED3DSIH_ATOMIC_CMP_STORE */ NULL, 5018 /* WINED3DSIH_ATOMIC_IADD */ NULL, 5019 /* WINED3DSIH_ATOMIC_IMAX */ NULL, 5020 /* WINED3DSIH_ATOMIC_IMIN */ NULL, 5021 /* WINED3DSIH_ATOMIC_OR */ NULL, 5022 /* WINED3DSIH_ATOMIC_UMAX */ NULL, 5023 /* WINED3DSIH_ATOMIC_UMIN */ NULL, 5024 /* WINED3DSIH_ATOMIC_XOR */ NULL, 5025 /* WINED3DSIH_BEM */ pshader_hw_bem, 5026 /* WINED3DSIH_BFI */ NULL, 5027 /* WINED3DSIH_BFREV */ NULL, 5028 /* WINED3DSIH_BREAK */ shader_hw_break, 5029 /* WINED3DSIH_BREAKC */ shader_hw_breakc, 5030 /* WINED3DSIH_BREAKP */ NULL, 5031 /* WINED3DSIH_BUFINFO */ NULL, 5032 /* WINED3DSIH_CALL */ shader_hw_call, 5033 /* WINED3DSIH_CALLNZ */ NULL, 5034 /* WINED3DSIH_CASE */ NULL, 5035 /* WINED3DSIH_CMP */ pshader_hw_cmp, 5036 /* WINED3DSIH_CND */ pshader_hw_cnd, 5037 /* WINED3DSIH_CONTINUE */ NULL, 5038 /* WINED3DSIH_CONTINUEP */ NULL, 5039 /* WINED3DSIH_COUNTBITS */ NULL, 5040 /* WINED3DSIH_CRS */ shader_hw_map2gl, 5041 /* WINED3DSIH_CUT */ NULL, 5042 /* WINED3DSIH_CUT_STREAM */ NULL, 5043 /* WINED3DSIH_DCL */ shader_hw_nop, 5044 /* WINED3DSIH_DCL_CONSTANT_BUFFER */ shader_hw_nop, 5045 /* WINED3DSIH_DCL_FUNCTION_BODY */ NULL, 5046 /* WINED3DSIH_DCL_FUNCTION_TABLE */ NULL, 5047 /* WINED3DSIH_DCL_GLOBAL_FLAGS */ NULL, 5048 /* WINED3DSIH_DCL_GS_INSTANCES */ NULL, 5049 /* WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT */ NULL, 5050 /* WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */ NULL, 5051 /* WINED3DSIH_DCL_HS_MAX_TESSFACTOR */ NULL, 5052 /* WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER */ NULL, 5053 /* WINED3DSIH_DCL_INDEX_RANGE */ NULL, 5054 /* WINED3DSIH_DCL_INDEXABLE_TEMP */ NULL, 5055 /* WINED3DSIH_DCL_INPUT */ NULL, 5056 /* WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT */ NULL, 5057 /* WINED3DSIH_DCL_INPUT_PRIMITIVE */ shader_hw_nop, 5058 /* WINED3DSIH_DCL_INPUT_PS */ NULL, 5059 /* WINED3DSIH_DCL_INPUT_PS_SGV */ NULL, 5060 /* WINED3DSIH_DCL_INPUT_PS_SIV */ NULL, 5061 /* WINED3DSIH_DCL_INPUT_SGV */ NULL, 5062 /* WINED3DSIH_DCL_INPUT_SIV */ NULL, 5063 /* WINED3DSIH_DCL_INTERFACE */ NULL, 5064 /* WINED3DSIH_DCL_OUTPUT */ NULL, 5065 /* WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT */ NULL, 5066 /* WINED3DSIH_DCL_OUTPUT_SIV */ NULL, 5067 /* WINED3DSIH_DCL_OUTPUT_TOPOLOGY */ shader_hw_nop, 5068 /* WINED3DSIH_DCL_RESOURCE_RAW */ NULL, 5069 /* WINED3DSIH_DCL_RESOURCE_STRUCTURED */ NULL, 5070 /* WINED3DSIH_DCL_SAMPLER */ NULL, 5071 /* WINED3DSIH_DCL_STREAM */ NULL, 5072 /* WINED3DSIH_DCL_TEMPS */ NULL, 5073 /* WINED3DSIH_DCL_TESSELLATOR_DOMAIN */ NULL, 5074 /* WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE */ NULL, 5075 /* WINED3DSIH_DCL_TESSELLATOR_PARTITIONING */ NULL, 5076 /* WINED3DSIH_DCL_TGSM_RAW */ NULL, 5077 /* WINED3DSIH_DCL_TGSM_STRUCTURED */ NULL, 5078 /* WINED3DSIH_DCL_THREAD_GROUP */ NULL, 5079 /* WINED3DSIH_DCL_UAV_RAW */ NULL, 5080 /* WINED3DSIH_DCL_UAV_STRUCTURED */ NULL, 5081 /* WINED3DSIH_DCL_UAV_TYPED */ NULL, 5082 /* WINED3DSIH_DCL_VERTICES_OUT */ shader_hw_nop, 5083 /* WINED3DSIH_DEF */ shader_hw_nop, 5084 /* WINED3DSIH_DEFAULT */ NULL, 5085 /* WINED3DSIH_DEFB */ shader_hw_nop, 5086 /* WINED3DSIH_DEFI */ shader_hw_nop, 5087 /* WINED3DSIH_DIV */ NULL, 5088 /* WINED3DSIH_DP2 */ NULL, 5089 /* WINED3DSIH_DP2ADD */ pshader_hw_dp2add, 5090 /* WINED3DSIH_DP3 */ shader_hw_map2gl, 5091 /* WINED3DSIH_DP4 */ shader_hw_map2gl, 5092 /* WINED3DSIH_DST */ shader_hw_map2gl, 5093 /* WINED3DSIH_DSX */ shader_hw_map2gl, 5094 /* WINED3DSIH_DSX_COARSE */ NULL, 5095 /* WINED3DSIH_DSX_FINE */ NULL, 5096 /* WINED3DSIH_DSY */ shader_hw_dsy, 5097 /* WINED3DSIH_DSY_COARSE */ NULL, 5098 /* WINED3DSIH_DSY_FINE */ NULL, 5099 /* WINED3DSIH_EVAL_SAMPLE_INDEX */ NULL, 5100 /* WINED3DSIH_ELSE */ shader_hw_else, 5101 /* WINED3DSIH_EMIT */ NULL, 5102 /* WINED3DSIH_EMIT_STREAM */ NULL, 5103 /* WINED3DSIH_ENDIF */ shader_hw_endif, 5104 /* WINED3DSIH_ENDLOOP */ shader_hw_endloop, 5105 /* WINED3DSIH_ENDREP */ shader_hw_endrep, 5106 /* WINED3DSIH_ENDSWITCH */ NULL, 5107 /* WINED3DSIH_EQ */ NULL, 5108 /* WINED3DSIH_EXP */ shader_hw_scalar_op, 5109 /* WINED3DSIH_EXPP */ shader_hw_scalar_op, 5110 /* WINED3DSIH_F16TOF32 */ NULL, 5111 /* WINED3DSIH_F32TOF16 */ NULL, 5112 /* WINED3DSIH_FCALL */ NULL, 5113 /* WINED3DSIH_FIRSTBIT_HI */ NULL, 5114 /* WINED3DSIH_FIRSTBIT_LO */ NULL, 5115 /* WINED3DSIH_FIRSTBIT_SHI */ NULL, 5116 /* WINED3DSIH_FRC */ shader_hw_map2gl, 5117 /* WINED3DSIH_FTOI */ NULL, 5118 /* WINED3DSIH_FTOU */ NULL, 5119 /* WINED3DSIH_GATHER4 */ NULL, 5120 /* WINED3DSIH_GATHER4_C */ NULL, 5121 /* WINED3DSIH_GATHER4_PO */ NULL, 5122 /* WINED3DSIH_GATHER4_PO_C */ NULL, 5123 /* WINED3DSIH_GE */ NULL, 5124 /* WINED3DSIH_HS_CONTROL_POINT_PHASE */ NULL, 5125 /* WINED3DSIH_HS_DECLS */ NULL, 5126 /* WINED3DSIH_HS_FORK_PHASE */ NULL, 5127 /* WINED3DSIH_HS_JOIN_PHASE */ NULL, 5128 /* WINED3DSIH_IADD */ NULL, 5129 /* WINED3DSIH_IBFE */ NULL, 5130 /* WINED3DSIH_IEQ */ NULL, 5131 /* WINED3DSIH_IF */ NULL /* Hardcoded into the shader */, 5132 /* WINED3DSIH_IFC */ shader_hw_ifc, 5133 /* WINED3DSIH_IGE */ NULL, 5134 /* WINED3DSIH_ILT */ NULL, 5135 /* WINED3DSIH_IMAD */ NULL, 5136 /* WINED3DSIH_IMAX */ NULL, 5137 /* WINED3DSIH_IMIN */ NULL, 5138 /* WINED3DSIH_IMM_ATOMIC_ALLOC */ NULL, 5139 /* WINED3DSIH_IMM_ATOMIC_AND */ NULL, 5140 /* WINED3DSIH_IMM_ATOMIC_CMP_EXCH */ NULL, 5141 /* WINED3DSIH_IMM_ATOMIC_CONSUME */ NULL, 5142 /* WINED3DSIH_IMM_ATOMIC_EXCH */ NULL, 5143 /* WINED3DSIH_IMM_ATOMIC_IADD */ NULL, 5144 /* WINED3DSIH_IMM_ATOMIC_IMAX */ NULL, 5145 /* WINED3DSIH_IMM_ATOMIC_IMIN */ NULL, 5146 /* WINED3DSIH_IMM_ATOMIC_OR */ NULL, 5147 /* WINED3DSIH_IMM_ATOMIC_UMAX */ NULL, 5148 /* WINED3DSIH_IMM_ATOMIC_UMIN */ NULL, 5149 /* WINED3DSIH_IMM_ATOMIC_XOR */ NULL, 5150 /* WINED3DSIH_IMUL */ NULL, 5151 /* WINED3DSIH_INE */ NULL, 5152 /* WINED3DSIH_INEG */ NULL, 5153 /* WINED3DSIH_ISHL */ NULL, 5154 /* WINED3DSIH_ISHR */ NULL, 5155 /* WINED3DSIH_ITOF */ NULL, 5156 /* WINED3DSIH_LABEL */ shader_hw_label, 5157 /* WINED3DSIH_LD */ NULL, 5158 /* WINED3DSIH_LD2DMS */ NULL, 5159 /* WINED3DSIH_LD_RAW */ NULL, 5160 /* WINED3DSIH_LD_STRUCTURED */ NULL, 5161 /* WINED3DSIH_LD_UAV_TYPED */ NULL, 5162 /* WINED3DSIH_LIT */ shader_hw_map2gl, 5163 /* WINED3DSIH_LOD */ NULL, 5164 /* WINED3DSIH_LOG */ shader_hw_scalar_op, 5165 /* WINED3DSIH_LOGP */ shader_hw_scalar_op, 5166 /* WINED3DSIH_LOOP */ shader_hw_loop, 5167 /* WINED3DSIH_LRP */ shader_hw_lrp, 5168 /* WINED3DSIH_LT */ NULL, 5169 /* WINED3DSIH_M3x2 */ shader_hw_mnxn, 5170 /* WINED3DSIH_M3x3 */ shader_hw_mnxn, 5171 /* WINED3DSIH_M3x4 */ shader_hw_mnxn, 5172 /* WINED3DSIH_M4x3 */ shader_hw_mnxn, 5173 /* WINED3DSIH_M4x4 */ shader_hw_mnxn, 5174 /* WINED3DSIH_MAD */ shader_hw_map2gl, 5175 /* WINED3DSIH_MAX */ shader_hw_map2gl, 5176 /* WINED3DSIH_MIN */ shader_hw_map2gl, 5177 /* WINED3DSIH_MOV */ shader_hw_mov, 5178 /* WINED3DSIH_MOVA */ shader_hw_mov, 5179 /* WINED3DSIH_MOVC */ NULL, 5180 /* WINED3DSIH_MUL */ shader_hw_map2gl, 5181 /* WINED3DSIH_NE */ NULL, 5182 /* WINED3DSIH_NOP */ shader_hw_nop, 5183 /* WINED3DSIH_NOT */ NULL, 5184 /* WINED3DSIH_NRM */ shader_hw_nrm, 5185 /* WINED3DSIH_OR */ NULL, 5186 /* WINED3DSIH_PHASE */ shader_hw_nop, 5187 /* WINED3DSIH_POW */ shader_hw_pow, 5188 /* WINED3DSIH_RCP */ shader_hw_scalar_op, 5189 /* WINED3DSIH_REP */ shader_hw_rep, 5190 /* WINED3DSIH_RESINFO */ NULL, 5191 /* WINED3DSIH_RET */ shader_hw_ret, 5192 /* WINED3DSIH_RETP */ NULL, 5193 /* WINED3DSIH_ROUND_NE */ NULL, 5194 /* WINED3DSIH_ROUND_NI */ NULL, 5195 /* WINED3DSIH_ROUND_PI */ NULL, 5196 /* WINED3DSIH_ROUND_Z */ NULL, 5197 /* WINED3DSIH_RSQ */ shader_hw_scalar_op, 5198 /* WINED3DSIH_SAMPLE */ NULL, 5199 /* WINED3DSIH_SAMPLE_B */ NULL, 5200 /* WINED3DSIH_SAMPLE_C */ NULL, 5201 /* WINED3DSIH_SAMPLE_C_LZ */ NULL, 5202 /* WINED3DSIH_SAMPLE_GRAD */ NULL, 5203 /* WINED3DSIH_SAMPLE_INFO */ NULL, 5204 /* WINED3DSIH_SAMPLE_LOD */ NULL, 5205 /* WINED3DSIH_SAMPLE_POS */ NULL, 5206 /* WINED3DSIH_SETP */ NULL, 5207 /* WINED3DSIH_SGE */ shader_hw_map2gl, 5208 /* WINED3DSIH_SGN */ shader_hw_sgn, 5209 /* WINED3DSIH_SINCOS */ shader_hw_sincos, 5210 /* WINED3DSIH_SLT */ shader_hw_map2gl, 5211 /* WINED3DSIH_SQRT */ NULL, 5212 /* WINED3DSIH_STORE_RAW */ NULL, 5213 /* WINED3DSIH_STORE_STRUCTURED */ NULL, 5214 /* WINED3DSIH_STORE_UAV_TYPED */ NULL, 5215 /* WINED3DSIH_SUB */ shader_hw_map2gl, 5216 /* WINED3DSIH_SWAPC */ NULL, 5217 /* WINED3DSIH_SWITCH */ NULL, 5218 /* WINED3DSIH_SYNC */ NULL, 5219 /* WINED3DSIH_TEX */ pshader_hw_tex, 5220 /* WINED3DSIH_TEXBEM */ pshader_hw_texbem, 5221 /* WINED3DSIH_TEXBEML */ pshader_hw_texbem, 5222 /* WINED3DSIH_TEXCOORD */ pshader_hw_texcoord, 5223 /* WINED3DSIH_TEXDEPTH */ pshader_hw_texdepth, 5224 /* WINED3DSIH_TEXDP3 */ pshader_hw_texdp3, 5225 /* WINED3DSIH_TEXDP3TEX */ pshader_hw_texdp3tex, 5226 /* WINED3DSIH_TEXKILL */ pshader_hw_texkill, 5227 /* WINED3DSIH_TEXLDD */ shader_hw_texldd, 5228 /* WINED3DSIH_TEXLDL */ shader_hw_texldl, 5229 /* WINED3DSIH_TEXM3x2DEPTH */ pshader_hw_texm3x2depth, 5230 /* WINED3DSIH_TEXM3x2PAD */ pshader_hw_texm3x2pad, 5231 /* WINED3DSIH_TEXM3x2TEX */ pshader_hw_texm3x2tex, 5232 /* WINED3DSIH_TEXM3x3 */ pshader_hw_texm3x3, 5233 /* WINED3DSIH_TEXM3x3DIFF */ NULL, 5234 /* WINED3DSIH_TEXM3x3PAD */ pshader_hw_texm3x3pad, 5235 /* WINED3DSIH_TEXM3x3SPEC */ pshader_hw_texm3x3spec, 5236 /* WINED3DSIH_TEXM3x3TEX */ pshader_hw_texm3x3tex, 5237 /* WINED3DSIH_TEXM3x3VSPEC */ pshader_hw_texm3x3vspec, 5238 /* WINED3DSIH_TEXREG2AR */ pshader_hw_texreg2ar, 5239 /* WINED3DSIH_TEXREG2GB */ pshader_hw_texreg2gb, 5240 /* WINED3DSIH_TEXREG2RGB */ pshader_hw_texreg2rgb, 5241 /* WINED3DSIH_UBFE */ NULL, 5242 /* WINED3DSIH_UDIV */ NULL, 5243 /* WINED3DSIH_UGE */ NULL, 5244 /* WINED3DSIH_ULT */ NULL, 5245 /* WINED3DSIH_UMAX */ NULL, 5246 /* WINED3DSIH_UMIN */ NULL, 5247 /* WINED3DSIH_UMUL */ NULL, 5248 /* WINED3DSIH_USHR */ NULL, 5249 /* WINED3DSIH_UTOF */ NULL, 5250 /* WINED3DSIH_XOR */ NULL, 5251 }; 5252 5253 static BOOL get_bool_const(const struct wined3d_shader_instruction *ins, 5254 const struct wined3d_shader *shader, DWORD idx) 5255 { 5256 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 5257 BOOL vshader = shader_is_vshader_version(reg_maps->shader_version.type); 5258 const struct wined3d_shader_lconst *constant; 5259 WORD bools = 0; 5260 WORD flag = (1u << idx); 5261 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5262 5263 if (reg_maps->local_bool_consts & flag) 5264 { 5265 /* What good is an if(bool) with a hardcoded local constant? I don't know, but handle it */ 5266 LIST_FOR_EACH_ENTRY(constant, &shader->constantsB, struct wined3d_shader_lconst, entry) 5267 { 5268 if (constant->idx == idx) 5269 { 5270 return constant->value[0]; 5271 } 5272 } 5273 ERR("Local constant not found\n"); 5274 return FALSE; 5275 } 5276 else 5277 { 5278 if(vshader) bools = priv->cur_vs_args->clip.boolclip.bools; 5279 else bools = priv->cur_ps_args->bools; 5280 return bools & flag; 5281 } 5282 } 5283 5284 static void get_loop_control_const(const struct wined3d_shader_instruction *ins, 5285 const struct wined3d_shader *shader, UINT idx, struct wined3d_shader_loop_control *loop_control) 5286 { 5287 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 5288 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5289 5290 /* Integer constants can either be a local constant, or they can be stored in the shader 5291 * type specific compile args. */ 5292 if (reg_maps->local_int_consts & (1u << idx)) 5293 { 5294 const struct wined3d_shader_lconst *constant; 5295 5296 LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry) 5297 { 5298 if (constant->idx == idx) 5299 { 5300 loop_control->count = constant->value[0]; 5301 loop_control->start = constant->value[1]; 5302 /* Step is signed. */ 5303 loop_control->step = (int)constant->value[2]; 5304 return; 5305 } 5306 } 5307 /* If this happens the flag was set incorrectly */ 5308 ERR("Local constant not found\n"); 5309 loop_control->count = 0; 5310 loop_control->start = 0; 5311 loop_control->step = 0; 5312 return; 5313 } 5314 5315 switch (reg_maps->shader_version.type) 5316 { 5317 case WINED3D_SHADER_TYPE_VERTEX: 5318 /* Count and aL start value are unsigned */ 5319 loop_control->count = priv->cur_vs_args->loop_ctrl[idx][0]; 5320 loop_control->start = priv->cur_vs_args->loop_ctrl[idx][1]; 5321 /* Step is signed. */ 5322 loop_control->step = ((char)priv->cur_vs_args->loop_ctrl[idx][2]); 5323 break; 5324 5325 case WINED3D_SHADER_TYPE_PIXEL: 5326 loop_control->count = priv->cur_ps_args->loop_ctrl[idx][0]; 5327 loop_control->start = priv->cur_ps_args->loop_ctrl[idx][1]; 5328 loop_control->step = ((char)priv->cur_ps_args->loop_ctrl[idx][2]); 5329 break; 5330 5331 default: 5332 FIXME("Unhandled shader type %#x.\n", reg_maps->shader_version.type); 5333 break; 5334 } 5335 } 5336 5337 static void record_instruction(struct list *list, const struct wined3d_shader_instruction *ins) 5338 { 5339 struct wined3d_shader_src_param *src_param = NULL, *rel_addr; 5340 struct wined3d_shader_dst_param *dst_param; 5341 struct recorded_instruction *rec; 5342 unsigned int i; 5343 5344 if (!(rec = heap_alloc_zero(sizeof(*rec)))) 5345 { 5346 ERR("Out of memory\n"); 5347 return; 5348 } 5349 5350 rec->ins = *ins; 5351 if (!(dst_param = heap_alloc(sizeof(*dst_param)))) 5352 goto free; 5353 *dst_param = *ins->dst; 5354 if (ins->dst->reg.idx[0].rel_addr) 5355 { 5356 if (!(rel_addr = heap_alloc(sizeof(*rel_addr)))) 5357 goto free; 5358 *rel_addr = *ins->dst->reg.idx[0].rel_addr; 5359 dst_param->reg.idx[0].rel_addr = rel_addr; 5360 } 5361 rec->ins.dst = dst_param; 5362 5363 if (!(src_param = heap_calloc(ins->src_count, sizeof(*src_param)))) 5364 goto free; 5365 for (i = 0; i < ins->src_count; ++i) 5366 { 5367 src_param[i] = ins->src[i]; 5368 if (ins->src[i].reg.idx[0].rel_addr) 5369 { 5370 if (!(rel_addr = heap_alloc(sizeof(*rel_addr)))) 5371 goto free; 5372 *rel_addr = *ins->src[i].reg.idx[0].rel_addr; 5373 src_param[i].reg.idx[0].rel_addr = rel_addr; 5374 } 5375 } 5376 rec->ins.src = src_param; 5377 list_add_tail(list, &rec->entry); 5378 return; 5379 5380 free: 5381 ERR("Out of memory\n"); 5382 if (dst_param) 5383 { 5384 heap_free((void *)dst_param->reg.idx[0].rel_addr); 5385 heap_free(dst_param); 5386 } 5387 if (src_param) 5388 { 5389 for (i = 0; i < ins->src_count; ++i) 5390 { 5391 heap_free((void *)src_param[i].reg.idx[0].rel_addr); 5392 } 5393 heap_free(src_param); 5394 } 5395 heap_free(rec); 5396 } 5397 5398 static void free_recorded_instruction(struct list *list) 5399 { 5400 struct recorded_instruction *rec_ins, *entry2; 5401 unsigned int i; 5402 5403 LIST_FOR_EACH_ENTRY_SAFE(rec_ins, entry2, list, struct recorded_instruction, entry) 5404 { 5405 list_remove(&rec_ins->entry); 5406 if (rec_ins->ins.dst) 5407 { 5408 heap_free((void *)rec_ins->ins.dst->reg.idx[0].rel_addr); 5409 heap_free((void *)rec_ins->ins.dst); 5410 } 5411 if (rec_ins->ins.src) 5412 { 5413 for (i = 0; i < rec_ins->ins.src_count; ++i) 5414 { 5415 heap_free((void *)rec_ins->ins.src[i].reg.idx[0].rel_addr); 5416 } 5417 heap_free((void *)rec_ins->ins.src); 5418 } 5419 heap_free(rec_ins); 5420 } 5421 } 5422 5423 static void pop_control_frame(const struct wined3d_shader_instruction *ins) 5424 { 5425 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5426 struct control_frame *control_frame; 5427 5428 if (ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP) 5429 { 5430 struct list *e = list_head(&priv->control_frames); 5431 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5432 list_remove(&control_frame->entry); 5433 heap_free(control_frame); 5434 priv->loop_depth--; 5435 } 5436 else if (ins->handler_idx == WINED3DSIH_ENDIF) 5437 { 5438 /* Non-ifc ENDIFs were already handled previously. */ 5439 struct list *e = list_head(&priv->control_frames); 5440 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5441 list_remove(&control_frame->entry); 5442 heap_free(control_frame); 5443 } 5444 } 5445 5446 static void shader_arb_handle_instruction(const struct wined3d_shader_instruction *ins) { 5447 SHADER_HANDLER hw_fct; 5448 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5449 const struct wined3d_shader *shader = ins->ctx->shader; 5450 struct control_frame *control_frame; 5451 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 5452 BOOL bool_const; 5453 5454 if(ins->handler_idx == WINED3DSIH_LOOP || ins->handler_idx == WINED3DSIH_REP) 5455 { 5456 control_frame = heap_alloc_zero(sizeof(*control_frame)); 5457 list_add_head(&priv->control_frames, &control_frame->entry); 5458 5459 if(ins->handler_idx == WINED3DSIH_LOOP) control_frame->type = LOOP; 5460 if(ins->handler_idx == WINED3DSIH_REP) control_frame->type = REP; 5461 5462 if(priv->target_version >= NV2) 5463 { 5464 control_frame->no.loop = priv->num_loops++; 5465 priv->loop_depth++; 5466 } 5467 else 5468 { 5469 /* Don't bother recording when we're in a not used if branch */ 5470 if(priv->muted) 5471 { 5472 return; 5473 } 5474 5475 if(!priv->recording) 5476 { 5477 list_init(&priv->record); 5478 priv->recording = TRUE; 5479 control_frame->outer_loop = TRUE; 5480 get_loop_control_const(ins, shader, ins->src[0].reg.idx[0].offset, &control_frame->loop_control); 5481 return; /* Instruction is handled */ 5482 } 5483 /* Record this loop in the outer loop's recording */ 5484 } 5485 } 5486 else if(ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP) 5487 { 5488 if(priv->target_version >= NV2) 5489 { 5490 /* Nothing to do. The control frame is popped after the HW instr handler */ 5491 } 5492 else 5493 { 5494 struct list *e = list_head(&priv->control_frames); 5495 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5496 list_remove(&control_frame->entry); 5497 5498 if(control_frame->outer_loop) 5499 { 5500 unsigned int iteration; 5501 int aL = 0; 5502 struct list copy; 5503 5504 /* Turn off recording before playback */ 5505 priv->recording = FALSE; 5506 5507 /* Move the recorded instructions to a separate list and get them out of the private data 5508 * structure. If there are nested loops, the shader_arb_handle_instruction below will 5509 * be recorded again, thus priv->record might be overwritten 5510 */ 5511 list_init(©); 5512 list_move_tail(©, &priv->record); 5513 list_init(&priv->record); 5514 5515 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5516 { 5517 shader_addline(buffer, "#unrolling loop: %u iterations, aL=%u, inc %d\n", 5518 control_frame->loop_control.count, control_frame->loop_control.start, 5519 control_frame->loop_control.step); 5520 aL = control_frame->loop_control.start; 5521 } 5522 else 5523 { 5524 shader_addline(buffer, "#unrolling rep: %u iterations\n", control_frame->loop_control.count); 5525 } 5526 5527 for (iteration = 0; iteration < control_frame->loop_control.count; ++iteration) 5528 { 5529 struct recorded_instruction *rec_ins; 5530 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5531 { 5532 priv->aL = aL; 5533 shader_addline(buffer, "#Iteration %u, aL=%d\n", iteration, aL); 5534 } 5535 else 5536 { 5537 shader_addline(buffer, "#Iteration %u\n", iteration); 5538 } 5539 5540 LIST_FOR_EACH_ENTRY(rec_ins, ©, struct recorded_instruction, entry) 5541 { 5542 shader_arb_handle_instruction(&rec_ins->ins); 5543 } 5544 5545 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5546 { 5547 aL += control_frame->loop_control.step; 5548 } 5549 } 5550 shader_addline(buffer, "#end loop/rep\n"); 5551 5552 free_recorded_instruction(©); 5553 heap_free(control_frame); 5554 return; /* Instruction is handled */ 5555 } 5556 else 5557 { 5558 /* This is a nested loop. Proceed to the normal recording function */ 5559 heap_free(control_frame); 5560 } 5561 } 5562 } 5563 5564 if(priv->recording) 5565 { 5566 record_instruction(&priv->record, ins); 5567 return; 5568 } 5569 5570 /* boolean if */ 5571 if(ins->handler_idx == WINED3DSIH_IF) 5572 { 5573 control_frame = heap_alloc_zero(sizeof(*control_frame)); 5574 list_add_head(&priv->control_frames, &control_frame->entry); 5575 control_frame->type = IF; 5576 5577 bool_const = get_bool_const(ins, shader, ins->src[0].reg.idx[0].offset); 5578 if (ins->src[0].modifiers == WINED3DSPSM_NOT) 5579 bool_const = !bool_const; 5580 if (!priv->muted && !bool_const) 5581 { 5582 shader_addline(buffer, "#if(FALSE){\n"); 5583 priv->muted = TRUE; 5584 control_frame->muting = TRUE; 5585 } 5586 else shader_addline(buffer, "#if(TRUE) {\n"); 5587 5588 return; /* Instruction is handled */ 5589 } 5590 else if(ins->handler_idx == WINED3DSIH_IFC) 5591 { 5592 /* IF(bool) and if_cond(a, b) use the same ELSE and ENDIF tokens */ 5593 control_frame = heap_alloc_zero(sizeof(*control_frame)); 5594 control_frame->type = IFC; 5595 control_frame->no.ifc = priv->num_ifcs++; 5596 list_add_head(&priv->control_frames, &control_frame->entry); 5597 } 5598 else if(ins->handler_idx == WINED3DSIH_ELSE) 5599 { 5600 struct list *e = list_head(&priv->control_frames); 5601 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5602 5603 if(control_frame->type == IF) 5604 { 5605 shader_addline(buffer, "#} else {\n"); 5606 if(!priv->muted && !control_frame->muting) 5607 { 5608 priv->muted = TRUE; 5609 control_frame->muting = TRUE; 5610 } 5611 else if(control_frame->muting) priv->muted = FALSE; 5612 return; /* Instruction is handled. */ 5613 } 5614 /* In case of an ifc, generate a HW shader instruction */ 5615 if (control_frame->type != IFC) 5616 ERR("Control frame does not match.\n"); 5617 } 5618 else if(ins->handler_idx == WINED3DSIH_ENDIF) 5619 { 5620 struct list *e = list_head(&priv->control_frames); 5621 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5622 5623 if(control_frame->type == IF) 5624 { 5625 shader_addline(buffer, "#} endif\n"); 5626 if(control_frame->muting) priv->muted = FALSE; 5627 list_remove(&control_frame->entry); 5628 heap_free(control_frame); 5629 return; /* Instruction is handled */ 5630 } 5631 /* In case of an ifc, generate a HW shader instruction */ 5632 if (control_frame->type != IFC) 5633 ERR("Control frame does not match.\n"); 5634 } 5635 5636 if(priv->muted) 5637 { 5638 pop_control_frame(ins); 5639 return; 5640 } 5641 5642 /* Select handler */ 5643 hw_fct = shader_arb_instruction_handler_table[ins->handler_idx]; 5644 5645 /* Unhandled opcode */ 5646 if (!hw_fct) 5647 { 5648 FIXME("Backend can't handle opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 5649 return; 5650 } 5651 hw_fct(ins); 5652 5653 pop_control_frame(ins); 5654 5655 shader_arb_add_instruction_modifiers(ins); 5656 } 5657 5658 static BOOL shader_arb_has_ffp_proj_control(void *shader_priv) 5659 { 5660 struct shader_arb_priv *priv = shader_priv; 5661 5662 return priv->ffp_proj_control; 5663 } 5664 5665 static void shader_arb_precompile(void *shader_priv, struct wined3d_shader *shader) {} 5666 5667 const struct wined3d_shader_backend_ops arb_program_shader_backend = 5668 { 5669 shader_arb_handle_instruction, 5670 shader_arb_precompile, 5671 shader_arb_select, 5672 shader_arb_select_compute, 5673 shader_arb_disable, 5674 shader_arb_update_float_vertex_constants, 5675 shader_arb_update_float_pixel_constants, 5676 shader_arb_load_constants, 5677 shader_arb_destroy, 5678 shader_arb_alloc, 5679 shader_arb_free, 5680 shader_arb_allocate_context_data, 5681 shader_arb_free_context_data, 5682 shader_arb_init_context_state, 5683 shader_arb_get_caps, 5684 shader_arb_color_fixup_supported, 5685 shader_arb_has_ffp_proj_control, 5686 }; 5687 5688 /* ARB_fragment_program fixed function pipeline replacement definitions */ 5689 #define ARB_FFP_CONST_TFACTOR 0 5690 #define ARB_FFP_CONST_COLOR_KEY_LOW ((ARB_FFP_CONST_TFACTOR) + 1) 5691 #define ARB_FFP_CONST_COLOR_KEY_HIGH ((ARB_FFP_CONST_COLOR_KEY_LOW) + 1) 5692 #define ARB_FFP_CONST_SPECULAR_ENABLE ((ARB_FFP_CONST_COLOR_KEY_HIGH) + 1) 5693 #define ARB_FFP_CONST_CONSTANT(i) ((ARB_FFP_CONST_SPECULAR_ENABLE) + 1 + i) 5694 #define ARB_FFP_CONST_BUMPMAT(i) ((ARB_FFP_CONST_CONSTANT(7)) + 1 + i) 5695 #define ARB_FFP_CONST_LUMINANCE(i) ((ARB_FFP_CONST_BUMPMAT(7)) + 1 + i) 5696 5697 struct arbfp_ffp_desc 5698 { 5699 struct ffp_frag_desc parent; 5700 GLuint shader; 5701 }; 5702 5703 /* Context activation is done by the caller. */ 5704 static void arbfp_enable(const struct wined3d_gl_info *gl_info, BOOL enable) 5705 { 5706 if (enable) 5707 { 5708 gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB); 5709 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)"); 5710 } 5711 else 5712 { 5713 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 5714 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 5715 } 5716 } 5717 5718 static void *arbfp_alloc(const struct wined3d_shader_backend_ops *shader_backend, void *shader_priv) 5719 { 5720 struct shader_arb_priv *priv; 5721 5722 /* Share private data between the shader backend and the pipeline 5723 * replacement, if both are the arb implementation. This is needed to 5724 * figure out whether ARBfp should be disabled if no pixel shader is bound 5725 * or not. */ 5726 if (shader_backend == &arb_program_shader_backend) 5727 priv = shader_priv; 5728 else if (!(priv = heap_alloc_zero(sizeof(*priv)))) 5729 return NULL; 5730 5731 wine_rb_init(&priv->fragment_shaders, wined3d_ffp_frag_program_key_compare); 5732 priv->use_arbfp_fixed_func = TRUE; 5733 5734 return priv; 5735 } 5736 5737 /* Context activation is done by the caller. */ 5738 static void arbfp_free_ffpshader(struct wine_rb_entry *entry, void *context) 5739 { 5740 const struct wined3d_gl_info *gl_info = context; 5741 struct arbfp_ffp_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_ffp_desc, parent.entry); 5742 5743 GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader)); 5744 checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)"); 5745 heap_free(entry_arb); 5746 } 5747 5748 /* Context activation is done by the caller. */ 5749 static void arbfp_free(struct wined3d_device *device) 5750 { 5751 struct shader_arb_priv *priv = device->fragment_priv; 5752 5753 wine_rb_destroy(&priv->fragment_shaders, arbfp_free_ffpshader, &device->adapter->gl_info); 5754 priv->use_arbfp_fixed_func = FALSE; 5755 5756 if (device->shader_backend != &arb_program_shader_backend) 5757 heap_free(device->fragment_priv); 5758 } 5759 5760 static void arbfp_get_caps(const struct wined3d_gl_info *gl_info, struct fragment_caps *caps) 5761 { 5762 caps->wined3d_caps = WINED3D_FRAGMENT_CAP_PROJ_CONTROL 5763 | WINED3D_FRAGMENT_CAP_SRGB_WRITE 5764 | WINED3D_FRAGMENT_CAP_COLOR_KEY; 5765 caps->PrimitiveMiscCaps = WINED3DPMISCCAPS_TSSARGTEMP; 5766 caps->TextureOpCaps = WINED3DTEXOPCAPS_DISABLE | 5767 WINED3DTEXOPCAPS_SELECTARG1 | 5768 WINED3DTEXOPCAPS_SELECTARG2 | 5769 WINED3DTEXOPCAPS_MODULATE4X | 5770 WINED3DTEXOPCAPS_MODULATE2X | 5771 WINED3DTEXOPCAPS_MODULATE | 5772 WINED3DTEXOPCAPS_ADDSIGNED2X | 5773 WINED3DTEXOPCAPS_ADDSIGNED | 5774 WINED3DTEXOPCAPS_ADD | 5775 WINED3DTEXOPCAPS_SUBTRACT | 5776 WINED3DTEXOPCAPS_ADDSMOOTH | 5777 WINED3DTEXOPCAPS_BLENDCURRENTALPHA | 5778 WINED3DTEXOPCAPS_BLENDFACTORALPHA | 5779 WINED3DTEXOPCAPS_BLENDTEXTUREALPHA | 5780 WINED3DTEXOPCAPS_BLENDDIFFUSEALPHA | 5781 WINED3DTEXOPCAPS_BLENDTEXTUREALPHAPM | 5782 WINED3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR | 5783 WINED3DTEXOPCAPS_MODULATECOLOR_ADDALPHA | 5784 WINED3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA | 5785 WINED3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR | 5786 WINED3DTEXOPCAPS_DOTPRODUCT3 | 5787 WINED3DTEXOPCAPS_MULTIPLYADD | 5788 WINED3DTEXOPCAPS_LERP | 5789 WINED3DTEXOPCAPS_BUMPENVMAP | 5790 WINED3DTEXOPCAPS_BUMPENVMAPLUMINANCE; 5791 5792 /* TODO: Implement WINED3DTEXOPCAPS_PREMODULATE */ 5793 5794 caps->MaxTextureBlendStages = MAX_TEXTURES; 5795 caps->MaxSimultaneousTextures = min(gl_info->limits.samplers[WINED3D_SHADER_TYPE_PIXEL], MAX_TEXTURES); 5796 } 5797 5798 static DWORD arbfp_get_emul_mask(const struct wined3d_gl_info *gl_info) 5799 { 5800 return GL_EXT_EMUL_ARB_MULTITEXTURE | GL_EXT_EMUL_EXT_FOG_COORD; 5801 } 5802 5803 static void state_texfactor_arbfp(struct wined3d_context *context, 5804 const struct wined3d_state *state, DWORD state_id) 5805 { 5806 const struct wined3d_gl_info *gl_info = context->gl_info; 5807 struct wined3d_device *device = context->device; 5808 struct wined3d_color color; 5809 5810 if (device->shader_backend == &arb_program_shader_backend) 5811 { 5812 struct shader_arb_priv *priv; 5813 5814 /* Don't load the parameter if we're using an arbfp pixel shader, 5815 * otherwise we'll overwrite application provided constants. */ 5816 if (use_ps(state)) 5817 return; 5818 5819 priv = device->shader_priv; 5820 priv->pshader_const_dirty[ARB_FFP_CONST_TFACTOR] = 1; 5821 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_TFACTOR + 1); 5822 } 5823 5824 wined3d_color_from_d3dcolor(&color, state->render_states[WINED3D_RS_TEXTUREFACTOR]); 5825 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, &color.r)); 5826 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, &color.r)"); 5827 } 5828 5829 static void state_tss_constant_arbfp(struct wined3d_context *context, 5830 const struct wined3d_state *state, DWORD state_id) 5831 { 5832 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 5833 const struct wined3d_gl_info *gl_info = context->gl_info; 5834 struct wined3d_device *device = context->device; 5835 struct wined3d_color color; 5836 5837 if (device->shader_backend == &arb_program_shader_backend) 5838 { 5839 struct shader_arb_priv *priv; 5840 5841 /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite 5842 * application provided constants. 5843 */ 5844 if (use_ps(state)) 5845 return; 5846 5847 priv = device->shader_priv; 5848 priv->pshader_const_dirty[ARB_FFP_CONST_CONSTANT(stage)] = 1; 5849 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_CONSTANT(stage) + 1); 5850 } 5851 5852 wined3d_color_from_d3dcolor(&color, state->texture_states[stage][WINED3D_TSS_CONSTANT]); 5853 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_CONSTANT(stage), &color.r)); 5854 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_CONSTANT(stage), &color.r)"); 5855 } 5856 5857 static void state_arb_specularenable(struct wined3d_context *context, 5858 const struct wined3d_state *state, DWORD state_id) 5859 { 5860 const struct wined3d_gl_info *gl_info = context->gl_info; 5861 struct wined3d_device *device = context->device; 5862 float col[4]; 5863 5864 if (device->shader_backend == &arb_program_shader_backend) 5865 { 5866 struct shader_arb_priv *priv; 5867 5868 /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite 5869 * application provided constants. 5870 */ 5871 if (use_ps(state)) 5872 return; 5873 5874 priv = device->shader_priv; 5875 priv->pshader_const_dirty[ARB_FFP_CONST_SPECULAR_ENABLE] = 1; 5876 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_SPECULAR_ENABLE + 1); 5877 } 5878 5879 if (state->render_states[WINED3D_RS_SPECULARENABLE]) 5880 { 5881 /* The specular color has no alpha */ 5882 col[0] = 1.0f; col[1] = 1.0f; 5883 col[2] = 1.0f; col[3] = 0.0f; 5884 } else { 5885 col[0] = 0.0f; col[1] = 0.0f; 5886 col[2] = 0.0f; col[3] = 0.0f; 5887 } 5888 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)); 5889 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)"); 5890 } 5891 5892 static void set_bumpmat_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 5893 { 5894 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 5895 const struct wined3d_gl_info *gl_info = context->gl_info; 5896 struct wined3d_device *device = context->device; 5897 float mat[2][2]; 5898 5899 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV; 5900 5901 if (device->shader_backend == &arb_program_shader_backend) 5902 { 5903 struct shader_arb_priv *priv = device->shader_priv; 5904 5905 /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants. */ 5906 if (use_ps(state)) 5907 return; 5908 5909 priv->pshader_const_dirty[ARB_FFP_CONST_BUMPMAT(stage)] = 1; 5910 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_BUMPMAT(stage) + 1); 5911 } 5912 5913 mat[0][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT00]); 5914 mat[0][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT01]); 5915 mat[1][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT10]); 5916 mat[1][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT11]); 5917 5918 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])); 5919 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])"); 5920 } 5921 5922 static void tex_bumpenvlum_arbfp(struct wined3d_context *context, 5923 const struct wined3d_state *state, DWORD state_id) 5924 { 5925 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 5926 const struct wined3d_gl_info *gl_info = context->gl_info; 5927 struct wined3d_device *device = context->device; 5928 float param[4]; 5929 5930 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV; 5931 5932 if (device->shader_backend == &arb_program_shader_backend) 5933 { 5934 struct shader_arb_priv *priv = device->shader_priv; 5935 5936 /* Exit now, don't set the luminance below, otherwise we may overwrite pixel shader constants. */ 5937 if (use_ps(state)) 5938 return; 5939 5940 priv->pshader_const_dirty[ARB_FFP_CONST_LUMINANCE(stage)] = 1; 5941 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_LUMINANCE(stage) + 1); 5942 } 5943 5944 param[0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LSCALE]); 5945 param[1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LOFFSET]); 5946 param[2] = 0.0f; 5947 param[3] = 0.0f; 5948 5949 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)); 5950 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)"); 5951 } 5952 5953 static void alpha_test_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 5954 { 5955 const struct wined3d_gl_info *gl_info = context->gl_info; 5956 int glParm; 5957 float ref; 5958 5959 TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 5960 5961 if (state->render_states[WINED3D_RS_ALPHATESTENABLE]) 5962 { 5963 gl_info->gl_ops.gl.p_glEnable(GL_ALPHA_TEST); 5964 checkGLcall("glEnable GL_ALPHA_TEST"); 5965 } 5966 else 5967 { 5968 gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST); 5969 checkGLcall("glDisable GL_ALPHA_TEST"); 5970 return; 5971 } 5972 5973 ref = ((float)state->render_states[WINED3D_RS_ALPHAREF]) / 255.0f; 5974 glParm = wined3d_gl_compare_func(state->render_states[WINED3D_RS_ALPHAFUNC]); 5975 5976 if (glParm) 5977 { 5978 gl_info->gl_ops.gl.p_glAlphaFunc(glParm, ref); 5979 checkGLcall("glAlphaFunc"); 5980 } 5981 } 5982 5983 static void color_key_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 5984 { 5985 const struct wined3d_texture *texture = state->textures[0]; 5986 const struct wined3d_gl_info *gl_info = context->gl_info; 5987 struct wined3d_device *device = context->device; 5988 struct wined3d_color float_key[2]; 5989 5990 if (!texture) 5991 return; 5992 5993 if (device->shader_backend == &arb_program_shader_backend) 5994 { 5995 struct shader_arb_priv *priv; 5996 5997 /* Don't load the parameter if we're using an arbfp pixel shader, 5998 * otherwise we'll overwrite application provided constants. */ 5999 if (use_ps(state)) 6000 return; 6001 6002 priv = device->shader_priv; 6003 priv->pshader_const_dirty[ARB_FFP_CONST_COLOR_KEY_LOW] = 1; 6004 priv->pshader_const_dirty[ARB_FFP_CONST_COLOR_KEY_HIGH] = 1; 6005 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_COLOR_KEY_HIGH + 1); 6006 } 6007 6008 wined3d_format_get_float_color_key(texture->resource.format, &texture->async.src_blt_color_key, float_key); 6009 6010 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_LOW, &float_key[0].r)); 6011 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_LOW, &float_key[0].r)"); 6012 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_HIGH, &float_key[1].r)); 6013 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_HIGH, &float_key[1].r)"); 6014 } 6015 6016 static const char *get_argreg(struct wined3d_string_buffer *buffer, DWORD argnum, unsigned int stage, DWORD arg) 6017 { 6018 const char *ret; 6019 6020 if(arg == ARG_UNUSED) return "unused"; /* This is the marker for unused registers */ 6021 6022 switch(arg & WINED3DTA_SELECTMASK) { 6023 case WINED3DTA_DIFFUSE: 6024 ret = "fragment.color.primary"; break; 6025 6026 case WINED3DTA_CURRENT: 6027 ret = "ret"; 6028 break; 6029 6030 case WINED3DTA_TEXTURE: 6031 switch(stage) { 6032 case 0: ret = "tex0"; break; 6033 case 1: ret = "tex1"; break; 6034 case 2: ret = "tex2"; break; 6035 case 3: ret = "tex3"; break; 6036 case 4: ret = "tex4"; break; 6037 case 5: ret = "tex5"; break; 6038 case 6: ret = "tex6"; break; 6039 case 7: ret = "tex7"; break; 6040 default: ret = "unknown texture"; 6041 } 6042 break; 6043 6044 case WINED3DTA_TFACTOR: 6045 ret = "tfactor"; break; 6046 6047 case WINED3DTA_SPECULAR: 6048 ret = "fragment.color.secondary"; break; 6049 6050 case WINED3DTA_TEMP: 6051 ret = "tempreg"; break; 6052 6053 case WINED3DTA_CONSTANT: 6054 switch(stage) { 6055 case 0: ret = "const0"; break; 6056 case 1: ret = "const1"; break; 6057 case 2: ret = "const2"; break; 6058 case 3: ret = "const3"; break; 6059 case 4: ret = "const4"; break; 6060 case 5: ret = "const5"; break; 6061 case 6: ret = "const6"; break; 6062 case 7: ret = "const7"; break; 6063 default: ret = "unknown constant"; 6064 } 6065 break; 6066 6067 default: 6068 return "unknown"; 6069 } 6070 6071 if(arg & WINED3DTA_COMPLEMENT) { 6072 shader_addline(buffer, "SUB arg%u, const.x, %s;\n", argnum, ret); 6073 if(argnum == 0) ret = "arg0"; 6074 if(argnum == 1) ret = "arg1"; 6075 if(argnum == 2) ret = "arg2"; 6076 } 6077 if(arg & WINED3DTA_ALPHAREPLICATE) { 6078 shader_addline(buffer, "MOV arg%u, %s.w;\n", argnum, ret); 6079 if(argnum == 0) ret = "arg0"; 6080 if(argnum == 1) ret = "arg1"; 6081 if(argnum == 2) ret = "arg2"; 6082 } 6083 return ret; 6084 } 6085 6086 static void gen_ffp_instr(struct wined3d_string_buffer *buffer, unsigned int stage, BOOL color, 6087 BOOL alpha, DWORD dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2) 6088 { 6089 const char *dstmask, *dstreg, *arg0, *arg1, *arg2; 6090 unsigned int mul = 1; 6091 6092 if(color && alpha) dstmask = ""; 6093 else if(color) dstmask = ".xyz"; 6094 else dstmask = ".w"; 6095 6096 if(dst == tempreg) dstreg = "tempreg"; 6097 else dstreg = "ret"; 6098 6099 arg0 = get_argreg(buffer, 0, stage, dw_arg0); 6100 arg1 = get_argreg(buffer, 1, stage, dw_arg1); 6101 arg2 = get_argreg(buffer, 2, stage, dw_arg2); 6102 6103 switch (op) 6104 { 6105 case WINED3D_TOP_DISABLE: 6106 break; 6107 6108 case WINED3D_TOP_SELECT_ARG2: 6109 arg1 = arg2; 6110 /* FALLTHROUGH */ 6111 case WINED3D_TOP_SELECT_ARG1: 6112 shader_addline(buffer, "MOV %s%s, %s;\n", dstreg, dstmask, arg1); 6113 break; 6114 6115 case WINED3D_TOP_MODULATE_4X: 6116 mul = 2; 6117 /* FALLTHROUGH */ 6118 case WINED3D_TOP_MODULATE_2X: 6119 mul *= 2; 6120 /* FALLTHROUGH */ 6121 case WINED3D_TOP_MODULATE: 6122 shader_addline(buffer, "MUL %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 6123 break; 6124 6125 case WINED3D_TOP_ADD_SIGNED_2X: 6126 mul = 2; 6127 /* FALLTHROUGH */ 6128 case WINED3D_TOP_ADD_SIGNED: 6129 shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2); 6130 arg2 = "arg2"; 6131 /* FALLTHROUGH */ 6132 case WINED3D_TOP_ADD: 6133 shader_addline(buffer, "ADD_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 6134 break; 6135 6136 case WINED3D_TOP_SUBTRACT: 6137 shader_addline(buffer, "SUB_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 6138 break; 6139 6140 case WINED3D_TOP_ADD_SMOOTH: 6141 shader_addline(buffer, "SUB arg1, const.x, %s;\n", arg1); 6142 shader_addline(buffer, "MAD_SAT %s%s, arg1, %s, %s;\n", dstreg, dstmask, arg2, arg1); 6143 break; 6144 6145 case WINED3D_TOP_BLEND_CURRENT_ALPHA: 6146 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_CURRENT); 6147 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6148 break; 6149 case WINED3D_TOP_BLEND_FACTOR_ALPHA: 6150 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TFACTOR); 6151 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6152 break; 6153 case WINED3D_TOP_BLEND_TEXTURE_ALPHA: 6154 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE); 6155 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6156 break; 6157 case WINED3D_TOP_BLEND_DIFFUSE_ALPHA: 6158 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_DIFFUSE); 6159 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6160 break; 6161 6162 case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM: 6163 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE); 6164 shader_addline(buffer, "SUB arg0.w, const.x, %s.w;\n", arg0); 6165 shader_addline(buffer, "MAD_SAT %s%s, %s, arg0.w, %s;\n", dstreg, dstmask, arg2, arg1); 6166 break; 6167 6168 /* D3DTOP_PREMODULATE ???? */ 6169 6170 case WINED3D_TOP_MODULATE_INVALPHA_ADD_COLOR: 6171 shader_addline(buffer, "SUB arg0.w, const.x, %s;\n", arg1); 6172 shader_addline(buffer, "MAD_SAT %s%s, arg0.w, %s, %s;\n", dstreg, dstmask, arg2, arg1); 6173 break; 6174 case WINED3D_TOP_MODULATE_ALPHA_ADD_COLOR: 6175 shader_addline(buffer, "MAD_SAT %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg1); 6176 break; 6177 case WINED3D_TOP_MODULATE_INVCOLOR_ADD_ALPHA: 6178 shader_addline(buffer, "SUB arg0, const.x, %s;\n", arg1); 6179 shader_addline(buffer, "MAD_SAT %s%s, arg0, %s, %s.w;\n", dstreg, dstmask, arg2, arg1); 6180 break; 6181 case WINED3D_TOP_MODULATE_COLOR_ADD_ALPHA: 6182 shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s.w;\n", dstreg, dstmask, arg1, arg2, arg1); 6183 break; 6184 6185 case WINED3D_TOP_DOTPRODUCT3: 6186 mul = 4; 6187 shader_addline(buffer, "SUB arg1, %s, const.w;\n", arg1); 6188 shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2); 6189 shader_addline(buffer, "DP3_SAT %s%s, arg1, arg2;\n", dstreg, dstmask); 6190 break; 6191 6192 case WINED3D_TOP_MULTIPLY_ADD: 6193 shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg0); 6194 break; 6195 6196 case WINED3D_TOP_LERP: 6197 /* The msdn is not quite right here */ 6198 shader_addline(buffer, "LRP %s%s, %s, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6199 break; 6200 6201 case WINED3D_TOP_BUMPENVMAP: 6202 case WINED3D_TOP_BUMPENVMAP_LUMINANCE: 6203 /* Those are handled in the first pass of the shader(generation pass 1 and 2) already */ 6204 break; 6205 6206 default: 6207 FIXME("Unhandled texture op %08x\n", op); 6208 } 6209 6210 if (mul == 2) 6211 shader_addline(buffer, "MUL_SAT %s%s, %s, const.y;\n", dstreg, dstmask, dstreg); 6212 else if (mul == 4) 6213 shader_addline(buffer, "MUL_SAT %s%s, %s, const.z;\n", dstreg, dstmask, dstreg); 6214 } 6215 6216 static const char *arbfp_texture_target(enum wined3d_gl_resource_type type) 6217 { 6218 switch(type) 6219 { 6220 case WINED3D_GL_RES_TYPE_TEX_1D: 6221 return "1D"; 6222 case WINED3D_GL_RES_TYPE_TEX_2D: 6223 return "2D"; 6224 case WINED3D_GL_RES_TYPE_TEX_3D: 6225 return "3D"; 6226 case WINED3D_GL_RES_TYPE_TEX_CUBE: 6227 return "CUBE"; 6228 case WINED3D_GL_RES_TYPE_TEX_RECT: 6229 return "RECT"; 6230 default: 6231 return "unexpected_resource_type"; 6232 } 6233 } 6234 6235 static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, const struct wined3d_gl_info *gl_info) 6236 { 6237 BYTE tex_read = 0, bump_used = 0, luminance_used = 0, constant_used = 0; 6238 BOOL tempreg_used = FALSE, tfactor_used = FALSE; 6239 unsigned int stage, lowest_disabled_stage; 6240 struct wined3d_string_buffer buffer; 6241 struct color_fixup_masks masks; 6242 BOOL custom_linear_fog = FALSE; 6243 const char *textype, *instr; 6244 DWORD arg0, arg1, arg2; 6245 char colorcor_dst[8]; 6246 BOOL op_equal; 6247 GLuint ret; 6248 6249 if (!string_buffer_init(&buffer)) 6250 { 6251 ERR("Failed to initialize shader buffer.\n"); 6252 return 0; 6253 } 6254 6255 shader_addline(&buffer, "!!ARBfp1.0\n"); 6256 6257 if (settings->color_key_enabled) 6258 { 6259 shader_addline(&buffer, "PARAM color_key_low = program.env[%u];\n", ARB_FFP_CONST_COLOR_KEY_LOW); 6260 shader_addline(&buffer, "PARAM color_key_high = program.env[%u];\n", ARB_FFP_CONST_COLOR_KEY_HIGH); 6261 tex_read |= 1; 6262 } 6263 6264 /* Find out which textures are read */ 6265 for (stage = 0; stage < MAX_TEXTURES; ++stage) 6266 { 6267 if (settings->op[stage].cop == WINED3D_TOP_DISABLE) 6268 break; 6269 6270 arg0 = settings->op[stage].carg0 & WINED3DTA_SELECTMASK; 6271 arg1 = settings->op[stage].carg1 & WINED3DTA_SELECTMASK; 6272 arg2 = settings->op[stage].carg2 & WINED3DTA_SELECTMASK; 6273 6274 if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE) 6275 tex_read |= 1u << stage; 6276 if (settings->op[stage].dst == tempreg) 6277 tempreg_used = TRUE; 6278 if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) 6279 tempreg_used = TRUE; 6280 if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) 6281 tfactor_used = TRUE; 6282 if (arg0 == WINED3DTA_CONSTANT || arg1 == WINED3DTA_CONSTANT || arg2 == WINED3DTA_CONSTANT) 6283 constant_used |= 1u << stage; 6284 6285 switch (settings->op[stage].cop) 6286 { 6287 case WINED3D_TOP_BUMPENVMAP_LUMINANCE: 6288 luminance_used |= 1u << stage; 6289 /* fall through */ 6290 case WINED3D_TOP_BUMPENVMAP: 6291 bump_used |= 1u << stage; 6292 /* fall through */ 6293 case WINED3D_TOP_BLEND_TEXTURE_ALPHA: 6294 case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM: 6295 tex_read |= 1u << stage; 6296 break; 6297 6298 case WINED3D_TOP_BLEND_FACTOR_ALPHA: 6299 tfactor_used = TRUE; 6300 break; 6301 6302 default: 6303 break; 6304 } 6305 6306 if (settings->op[stage].aop == WINED3D_TOP_DISABLE) 6307 continue; 6308 6309 arg0 = settings->op[stage].aarg0 & WINED3DTA_SELECTMASK; 6310 arg1 = settings->op[stage].aarg1 & WINED3DTA_SELECTMASK; 6311 arg2 = settings->op[stage].aarg2 & WINED3DTA_SELECTMASK; 6312 6313 if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE) 6314 tex_read |= 1u << stage; 6315 if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) 6316 tempreg_used = TRUE; 6317 if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) 6318 tfactor_used = TRUE; 6319 if (arg0 == WINED3DTA_CONSTANT || arg1 == WINED3DTA_CONSTANT || arg2 == WINED3DTA_CONSTANT) 6320 constant_used |= 1u << stage; 6321 } 6322 lowest_disabled_stage = stage; 6323 6324 switch (settings->fog) 6325 { 6326 case WINED3D_FFP_PS_FOG_OFF: break; 6327 case WINED3D_FFP_PS_FOG_LINEAR: 6328 if (gl_info->quirks & WINED3D_QUIRK_BROKEN_ARB_FOG) 6329 { 6330 custom_linear_fog = TRUE; 6331 break; 6332 } 6333 shader_addline(&buffer, "OPTION ARB_fog_linear;\n"); 6334 break; 6335 6336 case WINED3D_FFP_PS_FOG_EXP: shader_addline(&buffer, "OPTION ARB_fog_exp;\n"); break; 6337 case WINED3D_FFP_PS_FOG_EXP2: shader_addline(&buffer, "OPTION ARB_fog_exp2;\n"); break; 6338 default: FIXME("Unexpected fog setting %d\n", settings->fog); 6339 } 6340 6341 shader_addline(&buffer, "PARAM const = {1, 2, 4, 0.5};\n"); 6342 shader_addline(&buffer, "TEMP TMP;\n"); 6343 shader_addline(&buffer, "TEMP ret;\n"); 6344 if (tempreg_used || settings->sRGB_write) shader_addline(&buffer, "TEMP tempreg;\n"); 6345 shader_addline(&buffer, "TEMP arg0;\n"); 6346 shader_addline(&buffer, "TEMP arg1;\n"); 6347 shader_addline(&buffer, "TEMP arg2;\n"); 6348 for (stage = 0; stage < MAX_TEXTURES; ++stage) 6349 { 6350 if (constant_used & (1u << stage)) 6351 shader_addline(&buffer, "PARAM const%u = program.env[%u];\n", stage, ARB_FFP_CONST_CONSTANT(stage)); 6352 6353 if (!(tex_read & (1u << stage))) 6354 continue; 6355 6356 shader_addline(&buffer, "TEMP tex%u;\n", stage); 6357 6358 if (!(bump_used & (1u << stage))) 6359 continue; 6360 shader_addline(&buffer, "PARAM bumpmat%u = program.env[%u];\n", stage, ARB_FFP_CONST_BUMPMAT(stage)); 6361 6362 if (!(luminance_used & (1u << stage))) 6363 continue; 6364 shader_addline(&buffer, "PARAM luminance%u = program.env[%u];\n", stage, ARB_FFP_CONST_LUMINANCE(stage)); 6365 } 6366 if (tfactor_used) 6367 shader_addline(&buffer, "PARAM tfactor = program.env[%u];\n", ARB_FFP_CONST_TFACTOR); 6368 shader_addline(&buffer, "PARAM specular_enable = program.env[%u];\n", ARB_FFP_CONST_SPECULAR_ENABLE); 6369 6370 if (settings->sRGB_write) 6371 { 6372 shader_addline(&buffer, "PARAM srgb_consts0 = "); 6373 shader_arb_append_imm_vec4(&buffer, wined3d_srgb_const0); 6374 shader_addline(&buffer, ";\n"); 6375 shader_addline(&buffer, "PARAM srgb_consts1 = "); 6376 shader_arb_append_imm_vec4(&buffer, wined3d_srgb_const1); 6377 shader_addline(&buffer, ";\n"); 6378 } 6379 6380 if (lowest_disabled_stage < 7 && settings->emul_clipplanes) 6381 shader_addline(&buffer, "KIL fragment.texcoord[7];\n"); 6382 6383 if (tempreg_used || settings->sRGB_write) 6384 shader_addline(&buffer, "MOV tempreg, 0.0;\n"); 6385 6386 /* Generate texture sampling instructions */ 6387 for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) 6388 { 6389 if (!(tex_read & (1u << stage))) 6390 continue; 6391 6392 textype = arbfp_texture_target(settings->op[stage].tex_type); 6393 6394 if(settings->op[stage].projected == proj_none) { 6395 instr = "TEX"; 6396 } else if(settings->op[stage].projected == proj_count4 || 6397 settings->op[stage].projected == proj_count3) { 6398 instr = "TXP"; 6399 } else { 6400 FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); 6401 instr = "TXP"; 6402 } 6403 6404 if (stage > 0 6405 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP 6406 || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) 6407 { 6408 shader_addline(&buffer, "SWZ arg1, bumpmat%u, x, z, 0, 0;\n", stage - 1); 6409 shader_addline(&buffer, "DP3 ret.x, arg1, tex%u;\n", stage - 1); 6410 shader_addline(&buffer, "SWZ arg1, bumpmat%u, y, w, 0, 0;\n", stage - 1); 6411 shader_addline(&buffer, "DP3 ret.y, arg1, tex%u;\n", stage - 1); 6412 6413 /* with projective textures, texbem only divides the static texture coord, not the displacement, 6414 * so multiply the displacement with the dividing parameter before passing it to TXP 6415 */ 6416 if (settings->op[stage].projected != proj_none) { 6417 if(settings->op[stage].projected == proj_count4) { 6418 shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].w;\n", stage); 6419 shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n", stage, stage); 6420 } else { 6421 shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].z;\n", stage); 6422 shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n", stage, stage); 6423 } 6424 } else { 6425 shader_addline(&buffer, "ADD ret, ret, fragment.texcoord[%u];\n", stage); 6426 } 6427 6428 shader_addline(&buffer, "%s tex%u, ret, texture[%u], %s;\n", 6429 instr, stage, stage, textype); 6430 if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) 6431 { 6432 shader_addline(&buffer, "MAD_SAT ret.x, tex%u.z, luminance%u.x, luminance%u.y;\n", 6433 stage - 1, stage - 1, stage - 1); 6434 shader_addline(&buffer, "MUL tex%u, tex%u, ret.x;\n", stage, stage); 6435 } 6436 } else if(settings->op[stage].projected == proj_count3) { 6437 shader_addline(&buffer, "MOV ret, fragment.texcoord[%u];\n", stage); 6438 shader_addline(&buffer, "MOV ret.w, ret.z;\n"); 6439 shader_addline(&buffer, "%s tex%u, ret, texture[%u], %s;\n", 6440 instr, stage, stage, textype); 6441 } else { 6442 shader_addline(&buffer, "%s tex%u, fragment.texcoord[%u], texture[%u], %s;\n", 6443 instr, stage, stage, stage, textype); 6444 } 6445 6446 sprintf(colorcor_dst, "tex%u", stage); 6447 masks = calc_color_correction(settings->op[stage].color_fixup, WINED3DSP_WRITEMASK_ALL); 6448 gen_color_correction(&buffer, colorcor_dst, colorcor_dst, "const.x", "const.y", 6449 settings->op[stage].color_fixup, masks); 6450 } 6451 6452 if (settings->color_key_enabled) 6453 { 6454 shader_addline(&buffer, "SLT TMP, tex0, color_key_low;\n"); /* below low key */ 6455 shader_addline(&buffer, "SGE ret, tex0, color_key_high;\n"); /* above high key */ 6456 shader_addline(&buffer, "ADD TMP, TMP, ret;\n"); /* or */ 6457 shader_addline(&buffer, "DP4 TMP.b, TMP, TMP;\n"); /* on any channel */ 6458 shader_addline(&buffer, "SGE TMP, -TMP.b, 0.0;\n"); /* logical not */ 6459 shader_addline(&buffer, "KIL -TMP;\n"); /* discard if true */ 6460 } 6461 6462 shader_addline(&buffer, "MOV ret, fragment.color.primary;\n"); 6463 6464 /* Generate the main shader */ 6465 for (stage = 0; stage < MAX_TEXTURES; ++stage) 6466 { 6467 if (settings->op[stage].cop == WINED3D_TOP_DISABLE) 6468 break; 6469 6470 if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1 6471 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1) 6472 op_equal = settings->op[stage].carg1 == settings->op[stage].aarg1; 6473 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1 6474 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2) 6475 op_equal = settings->op[stage].carg1 == settings->op[stage].aarg2; 6476 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2 6477 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1) 6478 op_equal = settings->op[stage].carg2 == settings->op[stage].aarg1; 6479 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2 6480 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2) 6481 op_equal = settings->op[stage].carg2 == settings->op[stage].aarg2; 6482 else 6483 op_equal = settings->op[stage].aop == settings->op[stage].cop 6484 && settings->op[stage].carg0 == settings->op[stage].aarg0 6485 && settings->op[stage].carg1 == settings->op[stage].aarg1 6486 && settings->op[stage].carg2 == settings->op[stage].aarg2; 6487 6488 if (settings->op[stage].aop == WINED3D_TOP_DISABLE) 6489 { 6490 gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst, 6491 settings->op[stage].cop, settings->op[stage].carg0, 6492 settings->op[stage].carg1, settings->op[stage].carg2); 6493 } 6494 else if (op_equal) 6495 { 6496 gen_ffp_instr(&buffer, stage, TRUE, TRUE, settings->op[stage].dst, 6497 settings->op[stage].cop, settings->op[stage].carg0, 6498 settings->op[stage].carg1, settings->op[stage].carg2); 6499 } 6500 else if (settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP 6501 && settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP_LUMINANCE) 6502 { 6503 gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst, 6504 settings->op[stage].cop, settings->op[stage].carg0, 6505 settings->op[stage].carg1, settings->op[stage].carg2); 6506 gen_ffp_instr(&buffer, stage, FALSE, TRUE, settings->op[stage].dst, 6507 settings->op[stage].aop, settings->op[stage].aarg0, 6508 settings->op[stage].aarg1, settings->op[stage].aarg2); 6509 } 6510 } 6511 6512 if (settings->sRGB_write || custom_linear_fog) 6513 { 6514 shader_addline(&buffer, "MAD ret, fragment.color.secondary, specular_enable, ret;\n"); 6515 if (settings->sRGB_write) 6516 arbfp_add_sRGB_correction(&buffer, "ret", "arg0", "arg1", "arg2", "tempreg", FALSE); 6517 if (custom_linear_fog) 6518 arbfp_add_linear_fog(&buffer, "ret", "arg0"); 6519 shader_addline(&buffer, "MOV result.color, ret;\n"); 6520 } 6521 else 6522 { 6523 shader_addline(&buffer, "MAD result.color, fragment.color.secondary, specular_enable, ret;\n"); 6524 } 6525 6526 /* Footer */ 6527 shader_addline(&buffer, "END\n"); 6528 6529 /* Generate the shader */ 6530 GL_EXTCALL(glGenProgramsARB(1, &ret)); 6531 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ret)); 6532 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 6533 6534 string_buffer_free(&buffer); 6535 return ret; 6536 } 6537 6538 static void fragment_prog_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 6539 { 6540 const struct wined3d_gl_info *gl_info = context->gl_info; 6541 const struct wined3d_device *device = context->device; 6542 struct shader_arb_priv *priv = device->fragment_priv; 6543 BOOL use_pshader = use_ps(state); 6544 struct ffp_frag_settings settings; 6545 const struct arbfp_ffp_desc *desc; 6546 unsigned int i; 6547 6548 TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 6549 6550 if (isStateDirty(context, STATE_RENDER(WINED3D_RS_FOGENABLE))) 6551 { 6552 if (!use_pshader && device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) 6553 { 6554 /* Reload fixed function constants since they collide with the 6555 * pixel shader constants. */ 6556 for (i = 0; i < MAX_TEXTURES; ++i) 6557 { 6558 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00)); 6559 state_tss_constant_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_CONSTANT)); 6560 } 6561 state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR)); 6562 state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE)); 6563 color_key_arbfp(context, state, STATE_COLOR_KEY); 6564 } 6565 else if (use_pshader) 6566 { 6567 context->shader_update_mask |= 1u << WINED3D_SHADER_TYPE_PIXEL; 6568 } 6569 return; 6570 } 6571 6572 if (!use_pshader) 6573 { 6574 /* Find or create a shader implementing the fixed function pipeline 6575 * settings, then activate it. */ 6576 gen_ffp_frag_op(context, state, &settings, FALSE); 6577 desc = (const struct arbfp_ffp_desc *)find_ffp_frag_shader(&priv->fragment_shaders, &settings); 6578 if (!desc) 6579 { 6580 struct arbfp_ffp_desc *new_desc; 6581 6582 if (!(new_desc = heap_alloc(sizeof(*new_desc)))) 6583 { 6584 ERR("Out of memory\n"); 6585 return; 6586 } 6587 6588 new_desc->parent.settings = settings; 6589 new_desc->shader = gen_arbfp_ffp_shader(&settings, gl_info); 6590 add_ffp_frag_shader(&priv->fragment_shaders, &new_desc->parent); 6591 TRACE("Allocated fixed function replacement shader descriptor %p\n", new_desc); 6592 desc = new_desc; 6593 } 6594 6595 /* Now activate the replacement program. GL_FRAGMENT_PROGRAM_ARB is already active (however, note the 6596 * comment above the shader_select call below). If e.g. GLSL is active, the shader_select call will 6597 * deactivate it. 6598 */ 6599 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)); 6600 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)"); 6601 priv->current_fprogram_id = desc->shader; 6602 6603 if (device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) 6604 { 6605 /* Reload fixed function constants since they collide with the 6606 * pixel shader constants. */ 6607 for (i = 0; i < MAX_TEXTURES; ++i) 6608 { 6609 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00)); 6610 state_tss_constant_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_CONSTANT)); 6611 } 6612 state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR)); 6613 state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE)); 6614 color_key_arbfp(context, state, STATE_COLOR_KEY); 6615 } 6616 context->last_was_pshader = FALSE; 6617 } 6618 else if (!context->last_was_pshader) 6619 { 6620 if (device->shader_backend == &arb_program_shader_backend) 6621 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_F; 6622 context->last_was_pshader = TRUE; 6623 } 6624 6625 context->shader_update_mask |= 1u << WINED3D_SHADER_TYPE_PIXEL; 6626 } 6627 6628 /* We can't link the fog states to the fragment state directly since the 6629 * vertex pipeline links them to FOGENABLE. A different linking in different 6630 * pipeline parts can't be expressed in the combined state table, so we need 6631 * to handle that with a forwarding function. The other invisible side effect 6632 * is that changing the fog start and fog end (which links to FOGENABLE in 6633 * vertex) results in the fragment_prog_arbfp function being called because 6634 * FOGENABLE is dirty, which calls this function here. */ 6635 static void state_arbfp_fog(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 6636 { 6637 enum fogsource new_source; 6638 DWORD fogstart = state->render_states[WINED3D_RS_FOGSTART]; 6639 DWORD fogend = state->render_states[WINED3D_RS_FOGEND]; 6640 6641 TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 6642 6643 if (!isStateDirty(context, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL))) 6644 fragment_prog_arbfp(context, state, state_id); 6645 6646 if (!state->render_states[WINED3D_RS_FOGENABLE]) 6647 return; 6648 6649 if (state->render_states[WINED3D_RS_FOGTABLEMODE] == WINED3D_FOG_NONE) 6650 { 6651 if (use_vs(state)) 6652 { 6653 new_source = FOGSOURCE_VS; 6654 } 6655 else 6656 { 6657 if (state->render_states[WINED3D_RS_FOGVERTEXMODE] == WINED3D_FOG_NONE || context->last_was_rhw) 6658 new_source = FOGSOURCE_COORD; 6659 else 6660 new_source = FOGSOURCE_FFP; 6661 } 6662 } 6663 else 6664 { 6665 new_source = FOGSOURCE_FFP; 6666 } 6667 6668 if (new_source != context->fog_source || fogstart == fogend) 6669 { 6670 context->fog_source = new_source; 6671 state_fogstartend(context, state, STATE_RENDER(WINED3D_RS_FOGSTART)); 6672 } 6673 } 6674 6675 static void textransform(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 6676 { 6677 if (!isStateDirty(context, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL))) 6678 fragment_prog_arbfp(context, state, state_id); 6679 } 6680 6681 static const struct StateEntryTemplate arbfp_fragmentstate_template[] = 6682 { 6683 {STATE_RENDER(WINED3D_RS_TEXTUREFACTOR), { STATE_RENDER(WINED3D_RS_TEXTUREFACTOR), state_texfactor_arbfp }, WINED3D_GL_EXT_NONE }, 6684 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6685 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6686 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6687 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6688 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6689 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6690 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6691 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6692 {STATE_TEXTURESTAGE(0, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6693 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6694 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6695 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6696 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6697 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6698 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6699 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6700 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6701 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6702 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6703 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6704 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6705 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6706 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6707 {STATE_TEXTURESTAGE(1, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6708 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6709 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6710 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6711 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6712 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6713 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6714 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6715 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6716 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6717 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6718 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6719 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6720 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6721 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6722 {STATE_TEXTURESTAGE(2, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6723 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6724 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6725 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6726 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6727 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6728 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6729 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6730 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6731 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6732 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6733 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6734 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6735 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6736 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6737 {STATE_TEXTURESTAGE(3, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6738 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6739 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6740 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6741 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6742 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6743 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6744 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6745 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6746 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6747 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6748 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6749 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6750 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6751 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6752 {STATE_TEXTURESTAGE(4, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6753 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6754 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6755 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6756 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6757 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6758 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6759 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6760 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6761 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6762 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6763 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6764 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6765 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6766 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6767 {STATE_TEXTURESTAGE(5, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6768 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6769 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6770 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6771 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6772 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6773 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6774 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6775 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6776 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6777 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6778 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6779 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6780 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6781 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6782 {STATE_TEXTURESTAGE(6, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6783 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6784 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6785 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6786 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6787 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6788 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6789 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6790 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6791 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6792 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6793 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6794 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6795 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6796 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6797 {STATE_TEXTURESTAGE(7, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6798 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6799 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6800 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6801 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6802 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6803 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6804 {STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), fragment_prog_arbfp }, WINED3D_GL_EXT_NONE }, 6805 {STATE_RENDER(WINED3D_RS_ALPHAFUNC), { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6806 {STATE_RENDER(WINED3D_RS_ALPHAREF), { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6807 {STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), alpha_test_arbfp }, WINED3D_GL_EXT_NONE }, 6808 {STATE_RENDER(WINED3D_RS_COLORKEYENABLE), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6809 {STATE_COLOR_KEY, { STATE_COLOR_KEY, color_key_arbfp }, WINED3D_GL_EXT_NONE }, 6810 {STATE_RENDER(WINED3D_RS_FOGENABLE), { STATE_RENDER(WINED3D_RS_FOGENABLE), state_arbfp_fog }, WINED3D_GL_EXT_NONE }, 6811 {STATE_RENDER(WINED3D_RS_FOGTABLEMODE), { STATE_RENDER(WINED3D_RS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6812 {STATE_RENDER(WINED3D_RS_FOGVERTEXMODE), { STATE_RENDER(WINED3D_RS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6813 {STATE_RENDER(WINED3D_RS_FOGSTART), { STATE_RENDER(WINED3D_RS_FOGSTART), state_fogstartend }, WINED3D_GL_EXT_NONE }, 6814 {STATE_RENDER(WINED3D_RS_FOGEND), { STATE_RENDER(WINED3D_RS_FOGSTART), NULL }, WINED3D_GL_EXT_NONE }, 6815 {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE), { STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE), state_srgbwrite }, ARB_FRAMEBUFFER_SRGB }, 6816 {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6817 {STATE_RENDER(WINED3D_RS_FOGCOLOR), { STATE_RENDER(WINED3D_RS_FOGCOLOR), state_fogcolor }, WINED3D_GL_EXT_NONE }, 6818 {STATE_RENDER(WINED3D_RS_FOGDENSITY), { STATE_RENDER(WINED3D_RS_FOGDENSITY), state_fogdensity }, WINED3D_GL_EXT_NONE }, 6819 {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6820 {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6821 {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6822 {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6823 {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6824 {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6825 {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6826 {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6827 {STATE_TEXTURESTAGE(0, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(0, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6828 {STATE_TEXTURESTAGE(1, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(1, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6829 {STATE_TEXTURESTAGE(2, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(2, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6830 {STATE_TEXTURESTAGE(3, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(3, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6831 {STATE_TEXTURESTAGE(4, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(4, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6832 {STATE_TEXTURESTAGE(5, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(5, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6833 {STATE_TEXTURESTAGE(6, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(6, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6834 {STATE_TEXTURESTAGE(7, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(7, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6835 {STATE_RENDER(WINED3D_RS_SPECULARENABLE), { STATE_RENDER(WINED3D_RS_SPECULARENABLE), state_arb_specularenable}, WINED3D_GL_EXT_NONE }, 6836 {STATE_RENDER(WINED3D_RS_SHADEMODE), { STATE_RENDER(WINED3D_RS_SHADEMODE), state_shademode }, WINED3D_GL_EXT_NONE }, 6837 {0 /* Terminate */, { 0, 0 }, WINED3D_GL_EXT_NONE }, 6838 }; 6839 6840 static BOOL arbfp_alloc_context_data(struct wined3d_context *context) 6841 { 6842 return TRUE; 6843 } 6844 6845 static void arbfp_free_context_data(struct wined3d_context *context) 6846 { 6847 } 6848 6849 const struct fragment_pipeline arbfp_fragment_pipeline = { 6850 arbfp_enable, 6851 arbfp_get_caps, 6852 arbfp_get_emul_mask, 6853 arbfp_alloc, 6854 arbfp_free, 6855 arbfp_alloc_context_data, 6856 arbfp_free_context_data, 6857 shader_arb_color_fixup_supported, 6858 arbfp_fragmentstate_template, 6859 }; 6860 6861 struct arbfp_blit_type 6862 { 6863 enum complex_fixup fixup : 4; 6864 enum wined3d_gl_resource_type res_type : 3; 6865 DWORD use_color_key : 1; 6866 DWORD padding : 24; 6867 }; 6868 6869 struct arbfp_blit_desc 6870 { 6871 GLuint shader; 6872 struct arbfp_blit_type type; 6873 struct wine_rb_entry entry; 6874 }; 6875 6876 #define ARBFP_BLIT_PARAM_SIZE 0 6877 #define ARBFP_BLIT_PARAM_COLOR_KEY_LOW 1 6878 #define ARBFP_BLIT_PARAM_COLOR_KEY_HIGH 2 6879 6880 struct wined3d_arbfp_blitter 6881 { 6882 struct wined3d_blitter blitter; 6883 struct wine_rb_tree shaders; 6884 GLuint palette_texture; 6885 }; 6886 6887 static int arbfp_blit_type_compare(const void *key, const struct wine_rb_entry *entry) 6888 { 6889 const struct arbfp_blit_type *ka = key; 6890 const struct arbfp_blit_type *kb = &WINE_RB_ENTRY_VALUE(entry, const struct arbfp_blit_desc, entry)->type; 6891 6892 return memcmp(ka, kb, sizeof(*ka)); 6893 } 6894 6895 /* Context activation is done by the caller. */ 6896 static void arbfp_free_blit_shader(struct wine_rb_entry *entry, void *ctx) 6897 { 6898 struct arbfp_blit_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_blit_desc, entry); 6899 const struct wined3d_gl_info *gl_info; 6900 struct wined3d_context *context; 6901 6902 context = ctx; 6903 gl_info = context->gl_info; 6904 6905 GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader)); 6906 checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)"); 6907 heap_free(entry_arb); 6908 } 6909 6910 /* Context activation is done by the caller. */ 6911 static void arbfp_blitter_destroy(struct wined3d_blitter *blitter, struct wined3d_context *context) 6912 { 6913 const struct wined3d_gl_info *gl_info = context->gl_info; 6914 struct wined3d_arbfp_blitter *arbfp_blitter; 6915 struct wined3d_blitter *next; 6916 6917 if ((next = blitter->next)) 6918 next->ops->blitter_destroy(next, context); 6919 6920 arbfp_blitter = CONTAINING_RECORD(blitter, struct wined3d_arbfp_blitter, blitter); 6921 6922 wine_rb_destroy(&arbfp_blitter->shaders, arbfp_free_blit_shader, context); 6923 checkGLcall("Delete blit programs"); 6924 6925 if (arbfp_blitter->palette_texture) 6926 gl_info->gl_ops.gl.p_glDeleteTextures(1, &arbfp_blitter->palette_texture); 6927 6928 heap_free(arbfp_blitter); 6929 } 6930 6931 static BOOL gen_planar_yuv_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type, 6932 char *luminance) 6933 { 6934 char chroma; 6935 const char *tex, *texinstr = "TXP"; 6936 6937 if (type->fixup == COMPLEX_FIXUP_UYVY) 6938 { 6939 chroma = 'x'; 6940 *luminance = 'w'; 6941 } 6942 else 6943 { 6944 chroma = 'w'; 6945 *luminance = 'x'; 6946 } 6947 6948 tex = arbfp_texture_target(type->res_type); 6949 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_RECT) 6950 texinstr = "TEX"; 6951 6952 /* First we have to read the chroma values. This means we need at least two pixels(no filtering), 6953 * or 4 pixels(with filtering). To get the unmodified chromas, we have to rid ourselves of the 6954 * filtering when we sample the texture. 6955 * 6956 * These are the rules for reading the chroma: 6957 * 6958 * Even pixel: Cr 6959 * Even pixel: U 6960 * Odd pixel: V 6961 * 6962 * So we have to get the sampling x position in non-normalized coordinates in integers 6963 */ 6964 if (type->res_type != WINED3D_GL_RES_TYPE_TEX_RECT) 6965 { 6966 shader_addline(buffer, "MUL texcrd.xy, fragment.texcoord[0], size.x;\n"); 6967 shader_addline(buffer, "MOV texcrd.w, size.x;\n"); 6968 } 6969 else 6970 { 6971 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 6972 } 6973 /* We must not allow filtering between pixel x and x+1, this would mix U and V 6974 * Vertical filtering is ok. However, bear in mind that the pixel center is at 6975 * 0.5, so add 0.5. 6976 */ 6977 shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n"); 6978 shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.y;\n"); 6979 6980 /* Divide the x coordinate by 0.5 and get the fraction. This gives 0.25 and 0.75 for the 6981 * even and odd pixels respectively 6982 */ 6983 shader_addline(buffer, "MUL texcrd2, texcrd, coef.y;\n"); 6984 shader_addline(buffer, "FRC texcrd2, texcrd2;\n"); 6985 6986 /* Sample Pixel 1 */ 6987 shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex); 6988 6989 /* Put the value into either of the chroma values */ 6990 shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n"); 6991 shader_addline(buffer, "MUL chroma.x, luminance.%c, temp.x;\n", chroma); 6992 shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n"); 6993 shader_addline(buffer, "MUL chroma.y, luminance.%c, temp.x;\n", chroma); 6994 6995 /* Sample pixel 2. If we read an even pixel(SLT above returned 1), sample 6996 * the pixel right to the current one. Otherwise, sample the left pixel. 6997 * Bias and scale the SLT result to -1;1 and add it to the texcrd.x. 6998 */ 6999 shader_addline(buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n"); 7000 shader_addline(buffer, "ADD texcrd.x, texcrd, temp.x;\n"); 7001 shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex); 7002 7003 /* Put the value into the other chroma */ 7004 shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n"); 7005 shader_addline(buffer, "MAD chroma.y, luminance.%c, temp.x, chroma.y;\n", chroma); 7006 shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n"); 7007 shader_addline(buffer, "MAD chroma.x, luminance.%c, temp.x, chroma.x;\n", chroma); 7008 7009 /* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of 7010 * the current one and lerp the two U and V values 7011 */ 7012 7013 /* This gives the correctly filtered luminance value */ 7014 shader_addline(buffer, "TEX luminance, fragment.texcoord[0], texture[0], %s;\n", tex); 7015 7016 return TRUE; 7017 } 7018 7019 static BOOL gen_yv12_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type, 7020 char *luminance) 7021 { 7022 const char *tex; 7023 static const float yv12_coef[] 7024 = {2.0f / 3.0f, 1.0f / 6.0f, (2.0f / 3.0f) + (1.0f / 6.0f), 1.0f / 3.0f}; 7025 7026 tex = arbfp_texture_target(type->res_type); 7027 7028 /* YV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2) 7029 * V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So the effective 7030 * bitdepth is 12 bits per pixel. Since the U and V planes have only half the 7031 * pitch of the luminance plane, the packing into the gl texture is a bit 7032 * unfortunate. If the whole texture is interpreted as luminance data it looks 7033 * approximately like this: 7034 * 7035 * +----------------------------------+---- 7036 * | | 7037 * | | 7038 * | | 7039 * | | 7040 * | | 2 7041 * | LUMINANCE | - 7042 * | | 3 7043 * | | 7044 * | | 7045 * | | 7046 * | | 7047 * +----------------+-----------------+---- 7048 * | | | 7049 * | V even rows | V odd rows | 7050 * | | | 1 7051 * +----------------+------------------ - 7052 * | | | 3 7053 * | U even rows | U odd rows | 7054 * | | | 7055 * +----------------+-----------------+---- 7056 * | | | 7057 * | 0.5 | 0.5 | 7058 * 7059 * So it appears as if there are 4 chroma images, but in fact the odd rows 7060 * in the chroma images are in the same row as the even ones. So it is 7061 * kinda tricky to read 7062 * 7063 * When reading from rectangle textures, keep in mind that the input y coordinates 7064 * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height 7065 */ 7066 shader_addline(buffer, "PARAM yv12_coef = "); 7067 shader_arb_append_imm_vec4(buffer, yv12_coef); 7068 shader_addline(buffer, ";\n"); 7069 7070 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7071 /* the chroma planes have only half the width */ 7072 shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n"); 7073 7074 /* The first value is between 2/3 and 5/6th of the texture's height, so scale+bias 7075 * the coordinate. Also read the right side of the image when reading odd lines 7076 * 7077 * Don't forget to clamp the y values in into the range, otherwise we'll get filtering 7078 * bleeding 7079 */ 7080 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7081 { 7082 7083 shader_addline(buffer, "RCP chroma.w, size.y;\n"); 7084 7085 shader_addline(buffer, "MUL texcrd2.y, texcrd.y, size.y;\n"); 7086 7087 shader_addline(buffer, "FLR texcrd2.y, texcrd2.y;\n"); 7088 shader_addline(buffer, "MAD texcrd.y, texcrd.y, yv12_coef.y, yv12_coef.x;\n"); 7089 7090 /* Read odd lines from the right side(add size * 0.5 to the x coordinate */ 7091 shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */ 7092 shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n"); 7093 shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n"); 7094 shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n"); 7095 7096 /* clamp, keep the half pixel origin in mind */ 7097 shader_addline(buffer, "MAD temp.y, coef.y, chroma.w, yv12_coef.x;\n"); 7098 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7099 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.z;\n"); 7100 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7101 } 7102 else 7103 { 7104 /* Read from [size - size+size/4] */ 7105 shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n"); 7106 shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.w, size.y;\n"); 7107 7108 /* Read odd lines from the right side(add size * 0.5 to the x coordinate */ 7109 shader_addline(buffer, "ADD texcrd2.x, texcrd.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */ 7110 shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n"); 7111 shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n"); 7112 shader_addline(buffer, "MUL texcrd2.x, texcrd2.x, size.x;\n"); 7113 shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n"); 7114 7115 /* Make sure to read exactly from the pixel center */ 7116 shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n"); 7117 shader_addline(buffer, "ADD texcrd.y, texcrd.y, coef.y;\n"); 7118 7119 /* Clamp */ 7120 shader_addline(buffer, "MAD temp.y, size.y, coef.w, size.y;\n"); 7121 shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n"); 7122 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7123 shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n"); 7124 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7125 } 7126 /* Read the texture, put the result into the output register */ 7127 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7128 shader_addline(buffer, "MOV chroma.x, temp.w;\n"); 7129 7130 /* The other chroma value is 1/6th of the texture lower, from 5/6th to 6/6th 7131 * No need to clamp because we're just reusing the already clamped value from above 7132 */ 7133 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7134 shader_addline(buffer, "ADD texcrd.y, texcrd.y, yv12_coef.y;\n"); 7135 else 7136 shader_addline(buffer, "MAD texcrd.y, size.y, coef.w, texcrd.y;\n"); 7137 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7138 shader_addline(buffer, "MOV chroma.y, temp.w;\n"); 7139 7140 /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate. 7141 * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance 7142 * values due to filtering 7143 */ 7144 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7145 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7146 { 7147 /* Multiply the y coordinate by 2/3 and clamp it */ 7148 shader_addline(buffer, "MUL texcrd.y, texcrd.y, yv12_coef.x;\n"); 7149 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.x;\n"); 7150 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7151 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7152 } 7153 else 7154 { 7155 /* Reading from texture_rectangles is pretty straightforward, just use the unmodified 7156 * texture coordinate. It is still a good idea to clamp it though, since the opengl texture 7157 * is bigger 7158 */ 7159 shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n"); 7160 shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n"); 7161 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7162 } 7163 *luminance = 'a'; 7164 7165 return TRUE; 7166 } 7167 7168 static BOOL gen_nv12_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type, 7169 char *luminance) 7170 { 7171 const char *tex; 7172 static const float nv12_coef[] 7173 = {2.0f / 3.0f, 1.0f / 3.0f, 1.0f, 1.0f}; 7174 7175 tex = arbfp_texture_target(type->res_type); 7176 7177 /* NV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2) 7178 * sized plane where each component is an UV pair. So the effective 7179 * bitdepth is 12 bits per pixel If the whole texture is interpreted as luminance 7180 * data it looks approximately like this: 7181 * 7182 * +----------------------------------+---- 7183 * | | 7184 * | | 7185 * | | 7186 * | | 7187 * | | 2 7188 * | LUMINANCE | - 7189 * | | 3 7190 * | | 7191 * | | 7192 * | | 7193 * | | 7194 * +----------------------------------+---- 7195 * |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV| 7196 * |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV| 7197 * | | 1 7198 * | | - 7199 * | | 3 7200 * | | 7201 * | | 7202 * +----------------------------------+---- 7203 * 7204 * When reading from rectangle textures, keep in mind that the input y coordinates 7205 * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height. */ 7206 7207 shader_addline(buffer, "PARAM nv12_coef = "); 7208 shader_arb_append_imm_vec4(buffer, nv12_coef); 7209 shader_addline(buffer, ";\n"); 7210 7211 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7212 /* We only have half the number of chroma pixels. */ 7213 shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n"); 7214 7215 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7216 { 7217 shader_addline(buffer, "RCP chroma.w, size.x;\n"); 7218 shader_addline(buffer, "RCP chroma.z, size.y;\n"); 7219 7220 shader_addline(buffer, "MAD texcrd.y, texcrd.y, nv12_coef.y, nv12_coef.x;\n"); 7221 7222 /* We must not allow filtering horizontally, this would mix U and V. 7223 * Vertical filtering is ok. However, bear in mind that the pixel center is at 7224 * 0.5, so add 0.5. */ 7225 7226 /* Convert to non-normalized coordinates so we can find the 7227 * individual pixel. */ 7228 shader_addline(buffer, "MUL texcrd.x, texcrd.x, size.x;\n"); 7229 shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n"); 7230 /* Multiply by 2 since chroma components are stored in UV pixel pairs, 7231 * add 0.5 to hit the center of the pixel. */ 7232 shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n"); 7233 7234 /* Convert back to normalized coordinates. */ 7235 shader_addline(buffer, "MUL texcrd.x, texcrd.x, chroma.w;\n"); 7236 7237 /* Clamp, keep the half pixel origin in mind. */ 7238 shader_addline(buffer, "MAD temp.y, coef.y, chroma.z, nv12_coef.x;\n"); 7239 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7240 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.z, nv12_coef.z;\n"); 7241 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7242 } 7243 else 7244 { 7245 /* Read from [size - size+size/2] */ 7246 shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.y, size.y;\n"); 7247 7248 shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n"); 7249 /* Multiply by 2 since chroma components are stored in UV pixel pairs, 7250 * add 0.5 to hit the center of the pixel. */ 7251 shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n"); 7252 7253 /* Clamp */ 7254 shader_addline(buffer, "MAD temp.y, size.y, coef.y, size.y;\n"); 7255 shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n"); 7256 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7257 shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n"); 7258 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7259 } 7260 /* Read the texture, put the result into the output register. */ 7261 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7262 shader_addline(buffer, "MOV chroma.y, temp.w;\n"); 7263 7264 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7265 { 7266 /* Add 1/size.x */ 7267 shader_addline(buffer, "ADD texcrd.x, texcrd.x, chroma.w;\n"); 7268 } 7269 else 7270 { 7271 /* Add 1 */ 7272 shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.x;\n"); 7273 } 7274 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7275 shader_addline(buffer, "MOV chroma.x, temp.w;\n"); 7276 7277 /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate. 7278 * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance 7279 * values due to filtering. */ 7280 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7281 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7282 { 7283 /* Multiply the y coordinate by 2/3 and clamp it */ 7284 shader_addline(buffer, "MUL texcrd.y, texcrd.y, nv12_coef.x;\n"); 7285 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, nv12_coef.x;\n"); 7286 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7287 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7288 } 7289 else 7290 { 7291 /* Reading from texture_rectangles is pretty straightforward, just use the unmodified 7292 * texture coordinate. It is still a good idea to clamp it though, since the opengl texture 7293 * is bigger 7294 */ 7295 shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n"); 7296 shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n"); 7297 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7298 } 7299 *luminance = 'a'; 7300 7301 return TRUE; 7302 } 7303 7304 /* Context activation is done by the caller. */ 7305 static GLuint gen_p8_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type) 7306 { 7307 GLuint shader; 7308 struct wined3d_string_buffer buffer; 7309 const char *tex_target = arbfp_texture_target(type->res_type); 7310 7311 /* This should not happen because we only use this conversion for 7312 * present blits which don't use color keying. */ 7313 if (type->use_color_key) 7314 FIXME("Implement P8 color keying.\n"); 7315 7316 /* Shader header */ 7317 if (!string_buffer_init(&buffer)) 7318 { 7319 ERR("Failed to initialize shader buffer.\n"); 7320 return 0; 7321 } 7322 7323 GL_EXTCALL(glGenProgramsARB(1, &shader)); 7324 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7325 if (!shader) 7326 { 7327 string_buffer_free(&buffer); 7328 return 0; 7329 } 7330 7331 shader_addline(&buffer, "!!ARBfp1.0\n"); 7332 shader_addline(&buffer, "TEMP index;\n"); 7333 7334 /* { 255/256, 0.5/255*255/256, 0, 0 } */ 7335 shader_addline(&buffer, "PARAM constants = { 0.996, 0.00195, 0, 0 };\n"); 7336 7337 /* The alpha-component contains the palette index */ 7338 shader_addline(&buffer, "TEX index, fragment.texcoord[0], texture[0], %s;\n", tex_target); 7339 7340 /* Scale the index by 255/256 and add a bias of '0.5' in order to sample in the middle */ 7341 shader_addline(&buffer, "MAD index.a, index.a, constants.x, constants.y;\n"); 7342 7343 /* Use the alpha-component as an index in the palette to get the final color */ 7344 shader_addline(&buffer, "TEX result.color, index.a, texture[1], 1D;\n"); 7345 shader_addline(&buffer, "END\n"); 7346 7347 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 7348 7349 string_buffer_free(&buffer); 7350 7351 return shader; 7352 } 7353 7354 /* Context activation is done by the caller. */ 7355 static void upload_palette(struct wined3d_arbfp_blitter *blitter, 7356 const struct wined3d_texture *texture, struct wined3d_context *context) 7357 { 7358 const struct wined3d_palette *palette = texture->swapchain ? texture->swapchain->palette : NULL; 7359 const struct wined3d_gl_info *gl_info = context->gl_info; 7360 7361 if (!blitter->palette_texture) 7362 gl_info->gl_ops.gl.p_glGenTextures(1, &blitter->palette_texture); 7363 7364 GL_EXTCALL(glActiveTexture(GL_TEXTURE1)); 7365 gl_info->gl_ops.gl.p_glBindTexture(GL_TEXTURE_1D, blitter->palette_texture); 7366 7367 gl_info->gl_ops.gl.p_glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); 7368 7369 gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 7370 /* Make sure we have discrete color levels. */ 7371 gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 7372 gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 7373 /* TODO: avoid unneeded uploads in the future by adding some SFLAG_PALETTE_DIRTY mechanism */ 7374 if (palette) 7375 { 7376 gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 256, 0, GL_BGRA, 7377 GL_UNSIGNED_INT_8_8_8_8_REV, palette->colors); 7378 } 7379 else 7380 { 7381 static const DWORD black; 7382 FIXME("P8 surface loaded without a palette.\n"); 7383 gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 1, 0, GL_BGRA, 7384 GL_UNSIGNED_INT_8_8_8_8_REV, &black); 7385 } 7386 7387 /* Switch back to unit 0 in which the 2D texture will be stored. */ 7388 context_active_texture(context, gl_info, 0); 7389 } 7390 7391 /* Context activation is done by the caller. */ 7392 static GLuint gen_yuv_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type) 7393 { 7394 GLuint shader; 7395 struct wined3d_string_buffer buffer; 7396 char luminance_component; 7397 7398 if (type->use_color_key) 7399 FIXME("Implement YUV color keying.\n"); 7400 7401 /* Shader header */ 7402 if (!string_buffer_init(&buffer)) 7403 { 7404 ERR("Failed to initialize shader buffer.\n"); 7405 return 0; 7406 } 7407 7408 GL_EXTCALL(glGenProgramsARB(1, &shader)); 7409 checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))"); 7410 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7411 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)"); 7412 if (!shader) 7413 { 7414 string_buffer_free(&buffer); 7415 return 0; 7416 } 7417 7418 /* The YUY2 and UYVY formats contain two pixels packed into a 32 bit macropixel, 7419 * giving effectively 16 bit per pixel. The color consists of a luminance(Y) and 7420 * two chroma(U and V) values. Each macropixel has two luminance values, one for 7421 * each single pixel it contains, and one U and one V value shared between both 7422 * pixels. 7423 * 7424 * The data is loaded into an A8L8 texture. With YUY2, the luminance component 7425 * contains the luminance and alpha the chroma. With UYVY it is vice versa. Thus 7426 * take the format into account when generating the read swizzles 7427 * 7428 * Reading the Y value is straightforward - just sample the texture. The hardware 7429 * takes care of filtering in the horizontal and vertical direction. 7430 * 7431 * Reading the U and V values is harder. We have to avoid filtering horizontally, 7432 * because that would mix the U and V values of one pixel or two adjacent pixels. 7433 * Thus floor the texture coordinate and add 0.5 to get an unfiltered read, 7434 * regardless of the filtering setting. Vertical filtering works automatically 7435 * though - the U and V values of two rows are mixed nicely. 7436 * 7437 * Apart of avoiding filtering issues, the code has to know which value it just 7438 * read, and where it can find the other one. To determine this, it checks if 7439 * it sampled an even or odd pixel, and shifts the 2nd read accordingly. 7440 * 7441 * Handling horizontal filtering of U and V values requires reading a 2nd pair 7442 * of pixels, extracting U and V and mixing them. This is not implemented yet. 7443 * 7444 * An alternative implementation idea is to load the texture as A8R8G8B8 texture, 7445 * with width / 2. This way one read gives all 3 values, finding U and V is easy 7446 * in an unfiltered situation. Finding the luminance on the other hand requires 7447 * finding out if it is an odd or even pixel. The real drawback of this approach 7448 * is filtering. This would have to be emulated completely in the shader, reading 7449 * up two 2 packed pixels in up to 2 rows and interpolating both horizontally and 7450 * vertically. Beyond that it would require adjustments to the texture handling 7451 * code to deal with the width scaling 7452 */ 7453 shader_addline(&buffer, "!!ARBfp1.0\n"); 7454 shader_addline(&buffer, "TEMP luminance;\n"); 7455 shader_addline(&buffer, "TEMP temp;\n"); 7456 shader_addline(&buffer, "TEMP chroma;\n"); 7457 shader_addline(&buffer, "TEMP texcrd;\n"); 7458 shader_addline(&buffer, "TEMP texcrd2;\n"); 7459 shader_addline(&buffer, "PARAM coef = {1.0, 0.5, 2.0, 0.25};\n"); 7460 shader_addline(&buffer, "PARAM yuv_coef = {1.403, 0.344, 0.714, 1.770};\n"); 7461 shader_addline(&buffer, "PARAM size = program.local[%u];\n", ARBFP_BLIT_PARAM_SIZE); 7462 7463 switch (type->fixup) 7464 { 7465 case COMPLEX_FIXUP_UYVY: 7466 case COMPLEX_FIXUP_YUY2: 7467 if (!gen_planar_yuv_read(&buffer, type, &luminance_component)) 7468 { 7469 string_buffer_free(&buffer); 7470 return 0; 7471 } 7472 break; 7473 7474 case COMPLEX_FIXUP_YV12: 7475 if (!gen_yv12_read(&buffer, type, &luminance_component)) 7476 { 7477 string_buffer_free(&buffer); 7478 return 0; 7479 } 7480 break; 7481 7482 case COMPLEX_FIXUP_NV12: 7483 if (!gen_nv12_read(&buffer, type, &luminance_component)) 7484 { 7485 string_buffer_free(&buffer); 7486 return 0; 7487 } 7488 break; 7489 7490 default: 7491 FIXME("Unsupported YUV fixup %#x\n", type->fixup); 7492 string_buffer_free(&buffer); 7493 return 0; 7494 } 7495 7496 /* Calculate the final result. Formula is taken from 7497 * http://www.fourcc.org/fccyvrgb.php. Note that the chroma 7498 * ranges from -0.5 to 0.5 7499 */ 7500 shader_addline(&buffer, "SUB chroma.xy, chroma, coef.y;\n"); 7501 7502 shader_addline(&buffer, "MAD result.color.x, chroma.x, yuv_coef.x, luminance.%c;\n", luminance_component); 7503 shader_addline(&buffer, "MAD temp.x, -chroma.y, yuv_coef.y, luminance.%c;\n", luminance_component); 7504 shader_addline(&buffer, "MAD result.color.y, -chroma.x, yuv_coef.z, temp.x;\n"); 7505 shader_addline(&buffer, "MAD result.color.z, chroma.y, yuv_coef.w, luminance.%c;\n", luminance_component); 7506 shader_addline(&buffer, "END\n"); 7507 7508 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 7509 7510 string_buffer_free(&buffer); 7511 7512 return shader; 7513 } 7514 7515 /* Context activation is done by the caller. */ 7516 static GLuint arbfp_gen_plain_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type) 7517 { 7518 GLuint shader; 7519 struct wined3d_string_buffer buffer; 7520 const char *tex_target = arbfp_texture_target(type->res_type); 7521 7522 /* Shader header */ 7523 if (!string_buffer_init(&buffer)) 7524 { 7525 ERR("Failed to initialize shader buffer.\n"); 7526 return 0; 7527 } 7528 7529 GL_EXTCALL(glGenProgramsARB(1, &shader)); 7530 if (!shader) 7531 { 7532 string_buffer_free(&buffer); 7533 return 0; 7534 } 7535 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7536 7537 shader_addline(&buffer, "!!ARBfp1.0\n"); 7538 7539 if (type->use_color_key) 7540 { 7541 shader_addline(&buffer, "TEMP color;\n"); 7542 shader_addline(&buffer, "TEMP less, greater;\n"); 7543 shader_addline(&buffer, "PARAM color_key_low = program.local[%u];\n", ARBFP_BLIT_PARAM_COLOR_KEY_LOW); 7544 shader_addline(&buffer, "PARAM color_key_high = program.local[%u];\n", ARBFP_BLIT_PARAM_COLOR_KEY_HIGH); 7545 shader_addline(&buffer, "TEX color, fragment.texcoord[0], texture[0], %s;\n", tex_target); 7546 shader_addline(&buffer, "SLT less, color, color_key_low;\n"); /* below low key */ 7547 shader_addline(&buffer, "SGE greater, color, color_key_high;\n"); /* above high key */ 7548 shader_addline(&buffer, "ADD less, less, greater;\n"); /* or */ 7549 shader_addline(&buffer, "DP4 less.b, less, less;\n"); /* on any channel */ 7550 shader_addline(&buffer, "SGE less, -less.b, 0.0;\n"); /* logical not */ 7551 shader_addline(&buffer, "KIL -less;\n"); /* discard if true */ 7552 shader_addline(&buffer, "MOV result.color, color;\n"); 7553 } 7554 else 7555 { 7556 shader_addline(&buffer, "TEX result.color, fragment.texcoord[0], texture[0], %s;\n", tex_target); 7557 } 7558 7559 shader_addline(&buffer, "END\n"); 7560 7561 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 7562 7563 string_buffer_free(&buffer); 7564 7565 return shader; 7566 } 7567 7568 /* Context activation is done by the caller. */ 7569 static HRESULT arbfp_blit_set(struct wined3d_arbfp_blitter *blitter, struct wined3d_context *context, 7570 const struct wined3d_texture *texture, unsigned int sub_resource_idx, 7571 const struct wined3d_color_key *color_key) 7572 { 7573 enum complex_fixup fixup; 7574 const struct wined3d_gl_info *gl_info = context->gl_info; 7575 struct wine_rb_entry *entry; 7576 struct arbfp_blit_type type; 7577 struct arbfp_blit_desc *desc; 7578 struct wined3d_color float_color_key[2]; 7579 struct wined3d_vec4 size; 7580 unsigned int level; 7581 GLuint shader; 7582 7583 level = sub_resource_idx % texture->level_count; 7584 size.x = wined3d_texture_get_level_pow2_width(texture, level); 7585 size.y = wined3d_texture_get_level_pow2_height(texture, level); 7586 size.z = 1.0f; 7587 size.w = 1.0f; 7588 7589 if (is_complex_fixup(texture->resource.format->color_fixup)) 7590 fixup = get_complex_fixup(texture->resource.format->color_fixup); 7591 else 7592 fixup = COMPLEX_FIXUP_NONE; 7593 7594 switch (texture->target) 7595 { 7596 case GL_TEXTURE_1D: 7597 type.res_type = WINED3D_GL_RES_TYPE_TEX_1D; 7598 break; 7599 7600 case GL_TEXTURE_2D: 7601 type.res_type = WINED3D_GL_RES_TYPE_TEX_2D; 7602 break; 7603 7604 case GL_TEXTURE_3D: 7605 type.res_type = WINED3D_GL_RES_TYPE_TEX_3D; 7606 break; 7607 7608 case GL_TEXTURE_CUBE_MAP_ARB: 7609 type.res_type = WINED3D_GL_RES_TYPE_TEX_CUBE; 7610 break; 7611 7612 case GL_TEXTURE_RECTANGLE_ARB: 7613 type.res_type = WINED3D_GL_RES_TYPE_TEX_RECT; 7614 break; 7615 7616 default: 7617 ERR("Unexpected GL texture type %#x.\n", texture->target); 7618 type.res_type = WINED3D_GL_RES_TYPE_TEX_2D; 7619 } 7620 type.fixup = fixup; 7621 type.use_color_key = !!color_key; 7622 type.padding = 0; 7623 7624 if ((entry = wine_rb_get(&blitter->shaders, &type))) 7625 { 7626 desc = WINE_RB_ENTRY_VALUE(entry, struct arbfp_blit_desc, entry); 7627 shader = desc->shader; 7628 } 7629 else 7630 { 7631 switch (fixup) 7632 { 7633 case COMPLEX_FIXUP_NONE: 7634 if (!is_identity_fixup(texture->resource.format->color_fixup)) 7635 FIXME("Implement support for sign or swizzle fixups.\n"); 7636 shader = arbfp_gen_plain_shader(gl_info, &type); 7637 break; 7638 7639 case COMPLEX_FIXUP_P8: 7640 shader = gen_p8_shader(gl_info, &type); 7641 break; 7642 7643 case COMPLEX_FIXUP_YUY2: 7644 case COMPLEX_FIXUP_UYVY: 7645 case COMPLEX_FIXUP_YV12: 7646 case COMPLEX_FIXUP_NV12: 7647 shader = gen_yuv_shader(gl_info, &type); 7648 break; 7649 } 7650 7651 if (!shader) 7652 { 7653 FIXME("Unsupported complex fixup %#x, not setting a shader\n", fixup); 7654 return E_NOTIMPL; 7655 } 7656 7657 if (!(desc = heap_alloc(sizeof(*desc)))) 7658 goto err_out; 7659 7660 desc->type = type; 7661 desc->shader = shader; 7662 if (wine_rb_put(&blitter->shaders, &desc->type, &desc->entry) == -1) 7663 { 7664 err_out: 7665 ERR("Out of memory\n"); 7666 GL_EXTCALL(glDeleteProgramsARB(1, &shader)); 7667 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader))"); 7668 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0)); 7669 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0)"); 7670 heap_free(desc); 7671 return E_OUTOFMEMORY; 7672 } 7673 } 7674 7675 if (fixup == COMPLEX_FIXUP_P8) 7676 upload_palette(blitter, texture, context); 7677 7678 gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB); 7679 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)"); 7680 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7681 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)"); 7682 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARBFP_BLIT_PARAM_SIZE, &size.x)); 7683 checkGLcall("glProgramLocalParameter4fvARB"); 7684 if (type.use_color_key) 7685 { 7686 wined3d_format_get_float_color_key(texture->resource.format, color_key, float_color_key); 7687 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 7688 ARBFP_BLIT_PARAM_COLOR_KEY_LOW, &float_color_key[0].r)); 7689 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 7690 ARBFP_BLIT_PARAM_COLOR_KEY_HIGH, &float_color_key[1].r)); 7691 checkGLcall("glProgramLocalParameter4fvARB"); 7692 } 7693 7694 return WINED3D_OK; 7695 } 7696 7697 /* Context activation is done by the caller. */ 7698 static void arbfp_blit_unset(const struct wined3d_gl_info *gl_info) 7699 { 7700 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 7701 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 7702 } 7703 7704 static BOOL arbfp_blit_supported(enum wined3d_blit_op blit_op, const struct wined3d_context *context, 7705 const struct wined3d_resource *src_resource, DWORD src_location, 7706 const struct wined3d_resource *dst_resource, DWORD dst_location) 7707 { 7708 const struct wined3d_format *src_format = src_resource->format; 7709 const struct wined3d_format *dst_format = dst_resource->format; 7710 enum complex_fixup src_fixup; 7711 BOOL decompress; 7712 7713 if (!context->gl_info->supported[ARB_FRAGMENT_PROGRAM]) 7714 return FALSE; 7715 7716 if (blit_op == WINED3D_BLIT_OP_RAW_BLIT && dst_format->id == src_format->id) 7717 { 7718 if (dst_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)) 7719 blit_op = WINED3D_BLIT_OP_DEPTH_BLIT; 7720 else 7721 blit_op = WINED3D_BLIT_OP_COLOR_BLIT; 7722 } 7723 7724 switch (blit_op) 7725 { 7726 case WINED3D_BLIT_OP_COLOR_BLIT_CKEY: 7727 if (!context->d3d_info->shader_color_key) 7728 { 7729 /* The conversion modifies the alpha channel so the color key might no longer match. */ 7730 TRACE("Color keying not supported with converted textures.\n"); 7731 return FALSE; 7732 } 7733 case WINED3D_BLIT_OP_COLOR_BLIT_ALPHATEST: 7734 case WINED3D_BLIT_OP_COLOR_BLIT: 7735 break; 7736 7737 default: 7738 TRACE("Unsupported blit_op=%d\n", blit_op); 7739 return FALSE; 7740 } 7741 7742 decompress = src_format && (src_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED) 7743 && !(dst_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED); 7744 if (!decompress && !(src_resource->access & dst_resource->access & WINED3D_RESOURCE_ACCESS_GPU)) 7745 return FALSE; 7746 7747 src_fixup = get_complex_fixup(src_format->color_fixup); 7748 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d)) 7749 { 7750 TRACE("Checking support for fixup:\n"); 7751 dump_color_fixup_desc(src_format->color_fixup); 7752 } 7753 7754 if (!is_identity_fixup(dst_format->color_fixup) 7755 && (dst_format->id != src_format->id || dst_location != WINED3D_LOCATION_DRAWABLE)) 7756 { 7757 TRACE("Destination fixups are not supported\n"); 7758 return FALSE; 7759 } 7760 7761 if (is_identity_fixup(src_format->color_fixup)) 7762 { 7763 TRACE("[OK]\n"); 7764 return TRUE; 7765 } 7766 7767 /* We only support YUV conversions. */ 7768 if (!is_complex_fixup(src_format->color_fixup)) 7769 { 7770 if (wined3d_settings.offscreen_rendering_mode == ORM_BACKBUFFER) 7771 { 7772 WARN("Claiming fixup support because of ORM_BACKBUFFER.\n"); 7773 return TRUE; 7774 } 7775 7776 TRACE("[FAILED]\n"); 7777 return FALSE; 7778 } 7779 7780 switch(src_fixup) 7781 { 7782 case COMPLEX_FIXUP_YUY2: 7783 case COMPLEX_FIXUP_UYVY: 7784 case COMPLEX_FIXUP_YV12: 7785 case COMPLEX_FIXUP_NV12: 7786 case COMPLEX_FIXUP_P8: 7787 TRACE("[OK]\n"); 7788 return TRUE; 7789 7790 default: 7791 FIXME("Unsupported YUV fixup %#x\n", src_fixup); 7792 TRACE("[FAILED]\n"); 7793 return FALSE; 7794 } 7795 } 7796 7797 static DWORD arbfp_blitter_blit(struct wined3d_blitter *blitter, enum wined3d_blit_op op, 7798 struct wined3d_context *context, struct wined3d_surface *src_surface, DWORD src_location, 7799 const RECT *src_rect, struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect, 7800 const struct wined3d_color_key *color_key, enum wined3d_texture_filter_type filter) 7801 { 7802 unsigned int src_sub_resource_idx = surface_get_sub_resource_idx(src_surface); 7803 struct wined3d_texture *src_texture = src_surface->container; 7804 struct wined3d_texture *dst_texture = dst_surface->container; 7805 struct wined3d_device *device = dst_texture->resource.device; 7806 struct wined3d_arbfp_blitter *arbfp_blitter; 7807 struct wined3d_color_key alpha_test_key; 7808 struct wined3d_blitter *next; 7809 RECT s, d; 7810 7811 if (!arbfp_blit_supported(op, context, &src_texture->resource, src_location, 7812 &dst_texture->resource, dst_location)) 7813 { 7814 if ((next = blitter->next)) 7815 return next->ops->blitter_blit(next, op, context, src_surface, src_location, 7816 src_rect, dst_surface, dst_location, dst_rect, color_key, filter); 7817 } 7818 7819 arbfp_blitter = CONTAINING_RECORD(blitter, struct wined3d_arbfp_blitter, blitter); 7820 7821 /* Now load the surface */ 7822 if (wined3d_settings.offscreen_rendering_mode != ORM_FBO 7823 && (surface_get_sub_resource(src_surface)->locations 7824 & (WINED3D_LOCATION_TEXTURE_RGB | WINED3D_LOCATION_DRAWABLE)) 7825 == WINED3D_LOCATION_DRAWABLE 7826 && !wined3d_resource_is_offscreen(&src_texture->resource)) 7827 { 7828 unsigned int src_level = src_sub_resource_idx % src_texture->level_count; 7829 7830 /* Without FBO blits transferring from the drawable to the texture is 7831 * expensive, because we have to flip the data in sysmem. Since we can 7832 * flip in the blitter, we don't actually need that flip anyway. So we 7833 * use the surface's texture as scratch texture, and flip the source 7834 * rectangle instead. */ 7835 surface_load_fb_texture(src_surface, FALSE, context); 7836 7837 s = *src_rect; 7838 s.top = wined3d_texture_get_level_height(src_texture, src_level) - s.top; 7839 s.bottom = wined3d_texture_get_level_height(src_texture, src_level) - s.bottom; 7840 src_rect = &s; 7841 } 7842 else 7843 wined3d_texture_load(src_texture, context, FALSE); 7844 7845 context_apply_blit_state(context, device); 7846 7847 if (dst_location == WINED3D_LOCATION_DRAWABLE) 7848 { 7849 d = *dst_rect; 7850 surface_translate_drawable_coords(dst_surface, context->win_handle, &d); 7851 dst_rect = &d; 7852 } 7853 7854 if (wined3d_settings.offscreen_rendering_mode == ORM_FBO) 7855 { 7856 GLenum buffer; 7857 7858 if (dst_location == WINED3D_LOCATION_DRAWABLE) 7859 { 7860 TRACE("Destination surface %p is onscreen.\n", dst_surface); 7861 buffer = wined3d_texture_get_gl_buffer(dst_texture); 7862 } 7863 else 7864 { 7865 TRACE("Destination surface %p is offscreen.\n", dst_surface); 7866 buffer = GL_COLOR_ATTACHMENT0; 7867 } 7868 context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location); 7869 context_set_draw_buffer(context, buffer); 7870 context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER); 7871 context_invalidate_state(context, STATE_FRAMEBUFFER); 7872 } 7873 7874 if (op == WINED3D_BLIT_OP_COLOR_BLIT_ALPHATEST) 7875 { 7876 const struct wined3d_format *fmt = src_texture->resource.format; 7877 alpha_test_key.color_space_low_value = 0; 7878 alpha_test_key.color_space_high_value = ~(((1u << fmt->alpha_size) - 1) << fmt->alpha_offset); 7879 color_key = &alpha_test_key; 7880 } 7881 7882 arbfp_blit_set(arbfp_blitter, context, src_texture, src_sub_resource_idx, color_key); 7883 7884 /* Draw a textured quad */ 7885 draw_textured_quad(src_texture, src_sub_resource_idx, context, src_rect, dst_rect, filter); 7886 7887 /* Leave the opengl state valid for blitting */ 7888 arbfp_blit_unset(context->gl_info); 7889 7890 if (wined3d_settings.strict_draw_ordering 7891 || (dst_texture->swapchain && (dst_texture->swapchain->front_buffer == dst_texture))) 7892 context->gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */ 7893 7894 return dst_location; 7895 } 7896 7897 static void arbfp_blitter_clear(struct wined3d_blitter *blitter, struct wined3d_device *device, 7898 unsigned int rt_count, const struct wined3d_fb_state *fb, unsigned int rect_count, const RECT *clear_rects, 7899 const RECT *draw_rect, DWORD flags, const struct wined3d_color *colour, float depth, DWORD stencil) 7900 { 7901 struct wined3d_blitter *next; 7902 7903 if ((next = blitter->next)) 7904 next->ops->blitter_clear(next, device, rt_count, fb, rect_count, 7905 clear_rects, draw_rect, flags, colour, depth, stencil); 7906 } 7907 7908 static const struct wined3d_blitter_ops arbfp_blitter_ops = 7909 { 7910 arbfp_blitter_destroy, 7911 arbfp_blitter_clear, 7912 arbfp_blitter_blit, 7913 }; 7914 7915 void wined3d_arbfp_blitter_create(struct wined3d_blitter **next, const struct wined3d_device *device) 7916 { 7917 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 7918 struct wined3d_arbfp_blitter *blitter; 7919 7920 if (device->shader_backend != &arb_program_shader_backend 7921 && device->shader_backend != &glsl_shader_backend) 7922 return; 7923 7924 if (!gl_info->supported[ARB_FRAGMENT_PROGRAM]) 7925 return; 7926 7927 if (!gl_info->supported[WINED3D_GL_LEGACY_CONTEXT]) 7928 return; 7929 7930 if (!(blitter = heap_alloc(sizeof(*blitter)))) 7931 { 7932 ERR("Failed to allocate blitter.\n"); 7933 return; 7934 } 7935 7936 TRACE("Created blitter %p.\n", blitter); 7937 7938 blitter->blitter.ops = &arbfp_blitter_ops; 7939 blitter->blitter.next = *next; 7940 wine_rb_init(&blitter->shaders, arbfp_blit_type_compare); 7941 blitter->palette_texture = 0; 7942 *next = &blitter->blitter; 7943 } 7944