1 /* 2 * Pixel and vertex shaders implementation using ARB_vertex_program 3 * and ARB_fragment_program GL extensions. 4 * 5 * Copyright 2002-2003 Jason Edmeades 6 * Copyright 2002-2003 Raphael Junqueira 7 * Copyright 2004 Christian Costa 8 * Copyright 2005 Oliver Stieber 9 * Copyright 2006 Ivan Gyurdiev 10 * Copyright 2006 Jason Green 11 * Copyright 2006 Henri Verbeet 12 * Copyright 2007-2011, 2013-2014 Stefan Dösinger for CodeWeavers 13 * Copyright 2009 Henri Verbeet for CodeWeavers 14 * 15 * This library is free software; you can redistribute it and/or 16 * modify it under the terms of the GNU Lesser General Public 17 * License as published by the Free Software Foundation; either 18 * version 2.1 of the License, or (at your option) any later version. 19 * 20 * This library is distributed in the hope that it will be useful, 21 * but WITHOUT ANY WARRANTY; without even the implied warranty of 22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 * Lesser General Public License for more details. 24 * 25 * You should have received a copy of the GNU Lesser General Public 26 * License along with this library; if not, write to the Free Software 27 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 28 */ 29 30 #include "wined3d_private.h" 31 32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader); 33 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants); 34 WINE_DECLARE_DEBUG_CHANNEL(d3d); 35 WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); 36 37 static BOOL shader_is_pshader_version(enum wined3d_shader_type type) 38 { 39 return type == WINED3D_SHADER_TYPE_PIXEL; 40 } 41 42 static BOOL shader_is_vshader_version(enum wined3d_shader_type type) 43 { 44 return type == WINED3D_SHADER_TYPE_VERTEX; 45 } 46 47 static const char *get_line(const char **ptr) 48 { 49 const char *p, *q; 50 51 p = *ptr; 52 if (!(q = strstr(p, "\n"))) 53 { 54 if (!*p) return NULL; 55 *ptr += strlen(p); 56 return p; 57 } 58 *ptr = q + 1; 59 60 return p; 61 } 62 63 enum arb_helper_value 64 { 65 ARB_ZERO, 66 ARB_ONE, 67 ARB_TWO, 68 ARB_0001, 69 ARB_EPS, 70 71 ARB_VS_REL_OFFSET 72 }; 73 74 static const char *arb_get_helper_value(enum wined3d_shader_type shader, enum arb_helper_value value) 75 { 76 if (shader != WINED3D_SHADER_TYPE_VERTEX && shader != WINED3D_SHADER_TYPE_PIXEL) 77 { 78 ERR("Unsupported shader type '%s'.\n", debug_shader_type(shader)); 79 return "bad"; 80 } 81 82 if (shader == WINED3D_SHADER_TYPE_PIXEL) 83 { 84 switch (value) 85 { 86 case ARB_ZERO: return "ps_helper_const.x"; 87 case ARB_ONE: return "ps_helper_const.y"; 88 case ARB_TWO: return "coefmul.x"; 89 case ARB_0001: return "ps_helper_const.xxxy"; 90 case ARB_EPS: return "ps_helper_const.z"; 91 default: break; 92 } 93 } 94 else 95 { 96 switch (value) 97 { 98 case ARB_ZERO: return "helper_const.x"; 99 case ARB_ONE: return "helper_const.y"; 100 case ARB_TWO: return "helper_const.z"; 101 case ARB_EPS: return "helper_const.w"; 102 case ARB_0001: return "helper_const.xxxy"; 103 case ARB_VS_REL_OFFSET: return "rel_addr_const.y"; 104 } 105 } 106 FIXME("Unmanaged %s shader helper constant requested: %u.\n", 107 shader == WINED3D_SHADER_TYPE_PIXEL ? "pixel" : "vertex", value); 108 switch (value) 109 { 110 case ARB_ZERO: return "0.0"; 111 case ARB_ONE: return "1.0"; 112 case ARB_TWO: return "2.0"; 113 case ARB_0001: return "{0.0, 0.0, 0.0, 1.0}"; 114 case ARB_EPS: return "1e-8"; 115 default: return "bad"; 116 } 117 } 118 119 static inline BOOL ffp_clip_emul(const struct wined3d_context *context) 120 { 121 return context->lowest_disabled_stage < 7; 122 } 123 124 /* ARB_program_shader private data */ 125 126 struct control_frame 127 { 128 struct list entry; 129 enum 130 { 131 IF, 132 IFC, 133 LOOP, 134 REP 135 } type; 136 BOOL muting; 137 BOOL outer_loop; 138 union 139 { 140 unsigned int loop; 141 unsigned int ifc; 142 } no; 143 struct wined3d_shader_loop_control loop_control; 144 BOOL had_else; 145 }; 146 147 struct arb_ps_np2fixup_info 148 { 149 struct ps_np2fixup_info super; 150 /* For ARB we need an offset value: 151 * With both GLSL and ARB mode the NP2 fixup information (the texture dimensions) are stored in a 152 * consecutive way (GLSL uses a uniform array). Since ARB doesn't know the notion of a "standalone" 153 * array we need an offset to the index inside the program local parameter array. */ 154 UINT offset; 155 }; 156 157 struct arb_ps_compile_args 158 { 159 struct ps_compile_args super; 160 WORD bools; 161 WORD clip; /* only a boolean, use a WORD for alignment */ 162 unsigned char loop_ctrl[WINED3D_MAX_CONSTS_I][3]; 163 }; 164 165 struct stb_const_desc 166 { 167 unsigned char texunit; 168 UINT const_num; 169 }; 170 171 struct arb_ps_compiled_shader 172 { 173 struct arb_ps_compile_args args; 174 struct arb_ps_np2fixup_info np2fixup_info; 175 struct stb_const_desc bumpenvmatconst[MAX_TEXTURES]; 176 struct stb_const_desc luminanceconst[MAX_TEXTURES]; 177 UINT int_consts[WINED3D_MAX_CONSTS_I]; 178 GLuint prgId; 179 UINT ycorrection; 180 unsigned char numbumpenvmatconsts; 181 char num_int_consts; 182 }; 183 184 struct arb_vs_compile_args 185 { 186 struct vs_compile_args super; 187 union 188 { 189 struct 190 { 191 WORD bools; 192 unsigned char clip_texcoord; 193 unsigned char clipplane_mask; 194 } boolclip; 195 DWORD boolclip_compare; 196 } clip; 197 DWORD ps_signature; 198 union 199 { 200 unsigned char samplers[4]; 201 DWORD samplers_compare; 202 } vertex; 203 unsigned char loop_ctrl[WINED3D_MAX_CONSTS_I][3]; 204 }; 205 206 struct arb_vs_compiled_shader 207 { 208 struct arb_vs_compile_args args; 209 GLuint prgId; 210 UINT int_consts[WINED3D_MAX_CONSTS_I]; 211 char num_int_consts; 212 char need_color_unclamp; 213 UINT pos_fixup; 214 }; 215 216 struct recorded_instruction 217 { 218 struct wined3d_shader_instruction ins; 219 struct list entry; 220 }; 221 222 struct shader_arb_ctx_priv 223 { 224 char addr_reg[20]; 225 enum 226 { 227 /* plain GL_ARB_vertex_program or GL_ARB_fragment_program */ 228 ARB, 229 /* GL_NV_vertex_program2_option or GL_NV_fragment_program_option */ 230 NV2, 231 /* GL_NV_vertex_program3 or GL_NV_fragment_program2 */ 232 NV3 233 } target_version; 234 235 const struct arb_vs_compile_args *cur_vs_args; 236 const struct arb_ps_compile_args *cur_ps_args; 237 const struct arb_ps_compiled_shader *compiled_fprog; 238 const struct arb_vs_compiled_shader *compiled_vprog; 239 struct arb_ps_np2fixup_info *cur_np2fixup_info; 240 struct list control_frames; 241 struct list record; 242 BOOL recording; 243 BOOL muted; 244 unsigned int num_loops, loop_depth, num_ifcs; 245 int aL; 246 BOOL ps_post_process; 247 248 unsigned int vs_clipplanes; 249 BOOL footer_written; 250 BOOL in_main_func; 251 252 /* For 3.0 vertex shaders */ 253 const char *vs_output[MAX_REG_OUTPUT]; 254 /* For 2.x and earlier vertex shaders */ 255 const char *texcrd_output[8], *color_output[2], *fog_output; 256 257 /* 3.0 pshader input for compatibility with fixed function */ 258 const char *ps_input[MAX_REG_INPUT]; 259 }; 260 261 struct ps_signature 262 { 263 struct wined3d_shader_signature sig; 264 DWORD idx; 265 struct wine_rb_entry entry; 266 }; 267 268 struct arb_pshader_private { 269 struct arb_ps_compiled_shader *gl_shaders; 270 UINT num_gl_shaders, shader_array_size; 271 DWORD input_signature_idx; 272 DWORD clipplane_emulation; 273 BOOL clamp_consts; 274 }; 275 276 struct arb_vshader_private { 277 struct arb_vs_compiled_shader *gl_shaders; 278 UINT num_gl_shaders, shader_array_size; 279 UINT rel_offset; 280 }; 281 282 struct shader_arb_priv 283 { 284 GLuint current_vprogram_id; 285 GLuint current_fprogram_id; 286 const struct arb_ps_compiled_shader *compiled_fprog; 287 const struct arb_vs_compiled_shader *compiled_vprog; 288 BOOL use_arbfp_fixed_func; 289 struct wine_rb_tree fragment_shaders; 290 BOOL last_ps_const_clamped; 291 BOOL last_vs_color_unclamp; 292 293 struct wine_rb_tree signature_tree; 294 DWORD ps_sig_number; 295 296 unsigned int highest_dirty_ps_const, highest_dirty_vs_const; 297 char vshader_const_dirty[WINED3D_MAX_VS_CONSTS_F]; 298 char pshader_const_dirty[WINED3D_MAX_PS_CONSTS_F]; 299 const struct wined3d_context *last_context; 300 301 const struct wined3d_vertex_pipe_ops *vertex_pipe; 302 const struct fragment_pipeline *fragment_pipe; 303 BOOL ffp_proj_control; 304 }; 305 306 /* Context activation for state handlers is done by the caller. */ 307 308 static BOOL need_rel_addr_const(const struct arb_vshader_private *shader_data, 309 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 310 { 311 if (shader_data->rel_offset) return TRUE; 312 if (!reg_maps->usesmova) return FALSE; 313 return !gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]; 314 } 315 316 /* Returns TRUE if result.clip from GL_NV_vertex_program2 should be used and FALSE otherwise */ 317 static inline BOOL use_nv_clip(const struct wined3d_gl_info *gl_info) 318 { 319 return gl_info->supported[NV_VERTEX_PROGRAM2_OPTION] 320 && !(gl_info->quirks & WINED3D_QUIRK_NV_CLIP_BROKEN); 321 } 322 323 static BOOL need_helper_const(const struct arb_vshader_private *shader_data, 324 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 325 { 326 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) return TRUE; 327 if (!gl_info->supported[NV_VERTEX_PROGRAM]) return TRUE; /* Need to init colors. */ 328 if (gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) return TRUE; /* Load the immval offset. */ 329 if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) return TRUE; /* Have to init texcoords. */ 330 if (!use_nv_clip(gl_info)) return TRUE; /* Init the clip texcoord */ 331 if (reg_maps->usesnrm) return TRUE; /* 0.0 */ 332 if (reg_maps->usespow) return TRUE; /* EPS, 0.0 and 1.0 */ 333 if (reg_maps->fog) return TRUE; /* Clamping fog coord, 0.0 and 1.0 */ 334 return FALSE; 335 } 336 337 static unsigned int reserved_vs_const(const struct arb_vshader_private *shader_data, 338 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 339 { 340 unsigned int ret = 1; 341 /* We use one PARAM for the pos fixup, and in some cases one to load 342 * some immediate values into the shader. */ 343 if (need_helper_const(shader_data, reg_maps, gl_info)) ++ret; 344 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) ++ret; 345 return ret; 346 } 347 348 /* Loads floating point constants into the currently set ARB_vertex/fragment_program. 349 * When constant_list == NULL, it will load all the constants. 350 * 351 * @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders) 352 * or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders) 353 */ 354 /* Context activation is done by the caller. */ 355 static unsigned int shader_arb_load_constants_f(const struct wined3d_shader *shader, 356 const struct wined3d_gl_info *gl_info, GLuint target_type, unsigned int max_constants, 357 const struct wined3d_vec4 *constants, char *dirty_consts) 358 { 359 struct wined3d_shader_lconst *lconst; 360 unsigned int ret, i, j; 361 362 if (TRACE_ON(d3d_constants)) 363 { 364 for (i = 0; i < max_constants; ++i) 365 { 366 if (!dirty_consts[i]) 367 continue; 368 TRACE_(d3d_constants)("Loading constant %u: %s.\n", i, debug_vec4(&constants[i])); 369 } 370 } 371 372 i = 0; 373 374 /* In 1.X pixel shaders constants are implicitly clamped in the range [-1;1] */ 375 if (target_type == GL_FRAGMENT_PROGRAM_ARB && shader->reg_maps.shader_version.major == 1) 376 { 377 float lcl_const[4]; 378 /* ps 1.x supports only 8 constants, clamp only those. When switching between 1.x and higher 379 * shaders, the first 8 constants are marked dirty for reload 380 */ 381 for (; i < min(8, max_constants); ++i) 382 { 383 if (!dirty_consts[i]) 384 continue; 385 dirty_consts[i] = 0; 386 387 if (constants[i].x > 1.0f) 388 lcl_const[0] = 1.0f; 389 else if (constants[i].x < -1.0f) 390 lcl_const[0] = -1.0f; 391 else 392 lcl_const[0] = constants[i].x; 393 394 if (constants[i].y > 1.0f) 395 lcl_const[1] = 1.0f; 396 else if (constants[i].y < -1.0f) 397 lcl_const[1] = -1.0f; 398 else 399 lcl_const[1] = constants[i].y; 400 401 if (constants[i].z > 1.0f) 402 lcl_const[2] = 1.0f; 403 else if (constants[i].z < -1.0f) 404 lcl_const[2] = -1.0f; 405 else 406 lcl_const[2] = constants[i].z; 407 408 if (constants[i].w > 1.0f) 409 lcl_const[3] = 1.0f; 410 else if (constants[i].w < -1.0f) 411 lcl_const[3] = -1.0f; 412 else 413 lcl_const[3] = constants[i].w; 414 415 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, lcl_const)); 416 } 417 418 /* If further constants are dirty, reload them without clamping. 419 * 420 * The alternative is not to touch them, but then we cannot reset the dirty constant count 421 * to zero. That's bad for apps that only use PS 1.x shaders, because in that case the code 422 * above would always re-check the first 8 constants since max_constant remains at the init 423 * value 424 */ 425 } 426 427 if (gl_info->supported[EXT_GPU_PROGRAM_PARAMETERS]) 428 { 429 /* TODO: Benchmark if we're better of with finding the dirty constants ourselves, 430 * or just reloading *all* constants at once 431 * 432 GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, i, max_constants, constants + (i * 4))); 433 */ 434 for (; i < max_constants; ++i) 435 { 436 if (!dirty_consts[i]) 437 continue; 438 439 /* Find the next block of dirty constants */ 440 dirty_consts[i] = 0; 441 j = i; 442 for (++i; (i < max_constants) && dirty_consts[i]; ++i) 443 { 444 dirty_consts[i] = 0; 445 } 446 447 GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, j, i - j, &constants[j].x)); 448 } 449 } 450 else 451 { 452 for (; i < max_constants; ++i) 453 { 454 if (dirty_consts[i]) 455 { 456 dirty_consts[i] = 0; 457 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, &constants[i].x)); 458 } 459 } 460 } 461 checkGLcall("glProgramEnvParameter4fvARB()"); 462 463 /* Load immediate constants */ 464 if (shader->load_local_constsF) 465 { 466 if (TRACE_ON(d3d_shader)) 467 { 468 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 469 { 470 GLfloat* values = (GLfloat*)lconst->value; 471 TRACE_(d3d_constants)("Loading local constants %i: %f, %f, %f, %f\n", lconst->idx, 472 values[0], values[1], values[2], values[3]); 473 } 474 } 475 /* Immediate constants are clamped for 1.X shaders at loading times */ 476 ret = 0; 477 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 478 { 479 dirty_consts[lconst->idx] = 1; /* Dirtify so the non-immediate constant overwrites it next time */ 480 ret = max(ret, lconst->idx + 1); 481 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, lconst->idx, (GLfloat*)lconst->value)); 482 } 483 checkGLcall("glProgramEnvParameter4fvARB()"); 484 return ret; /* The loaded immediate constants need reloading for the next shader */ 485 } else { 486 return 0; /* No constants are dirty now */ 487 } 488 } 489 490 /* Loads the texture dimensions for NP2 fixup into the currently set 491 * ARB_[vertex/fragment]_programs. */ 492 static void shader_arb_load_np2fixup_constants(const struct arb_ps_np2fixup_info *fixup, 493 const struct wined3d_gl_info *gl_info, const struct wined3d_state *state) 494 { 495 GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS]; 496 WORD active = fixup->super.active; 497 UINT i; 498 499 if (!active) 500 return; 501 502 for (i = 0; active; active >>= 1, ++i) 503 { 504 const struct wined3d_texture *tex = state->textures[i]; 505 unsigned char idx = fixup->super.idx[i]; 506 GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4]; 507 508 if (!(active & 1)) 509 continue; 510 511 if (!tex) 512 { 513 ERR("Nonexistent texture is flagged for NP2 texcoord fixup.\n"); 514 continue; 515 } 516 517 if (idx % 2) 518 { 519 tex_dim[2] = tex->pow2_matrix[0]; 520 tex_dim[3] = tex->pow2_matrix[5]; 521 } 522 else 523 { 524 tex_dim[0] = tex->pow2_matrix[0]; 525 tex_dim[1] = tex->pow2_matrix[5]; 526 } 527 } 528 529 for (i = 0; i < fixup->super.num_consts; ++i) 530 { 531 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 532 fixup->offset + i, &np2fixup_constants[i * 4])); 533 } 534 } 535 536 /* Context activation is done by the caller. */ 537 static void shader_arb_ps_local_constants(const struct arb_ps_compiled_shader *gl_shader, 538 const struct wined3d_context *context, const struct wined3d_state *state, UINT rt_height) 539 { 540 const struct wined3d_gl_info *gl_info = context->gl_info; 541 unsigned char i; 542 543 for(i = 0; i < gl_shader->numbumpenvmatconsts; i++) 544 { 545 int texunit = gl_shader->bumpenvmatconst[i].texunit; 546 547 /* The state manager takes care that this function is always called if the bump env matrix changes */ 548 const float *data = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_MAT00]; 549 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 550 gl_shader->bumpenvmatconst[i].const_num, data)); 551 552 if (gl_shader->luminanceconst[i].const_num != WINED3D_CONST_NUM_UNUSED) 553 { 554 /* WINED3D_TSS_BUMPENVLSCALE and WINED3D_TSS_BUMPENVLOFFSET are next to each other. 555 * point gl to the scale, and load 4 floats. x = scale, y = offset, z and w are junk, we 556 * don't care about them. The pointers are valid for sure because the stateblock is bigger. 557 * (they're WINED3D_TSS_TEXTURETRANSFORMFLAGS and WINED3D_TSS_ADDRESSW, so most likely 0 or NaN 558 */ 559 const float *scale = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_LSCALE]; 560 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 561 gl_shader->luminanceconst[i].const_num, scale)); 562 } 563 } 564 checkGLcall("Load bumpmap consts"); 565 566 if(gl_shader->ycorrection != WINED3D_CONST_NUM_UNUSED) 567 { 568 /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen). 569 * ycorrection.y: -1.0(onscreen), 1.0(offscreen) 570 * ycorrection.z: 1.0 571 * ycorrection.w: 0.0 572 */ 573 float val[4]; 574 val[0] = context->render_offscreen ? 0.0f : (float) rt_height; 575 val[1] = context->render_offscreen ? 1.0f : -1.0f; 576 val[2] = 1.0f; 577 val[3] = 0.0f; 578 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->ycorrection, val)); 579 checkGLcall("y correction loading"); 580 } 581 582 if (!gl_shader->num_int_consts) return; 583 584 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 585 { 586 if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED) 587 { 588 float val[4]; 589 val[0] = (float)state->ps_consts_i[i].x; 590 val[1] = (float)state->ps_consts_i[i].y; 591 val[2] = (float)state->ps_consts_i[i].z; 592 val[3] = -1.0f; 593 594 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->int_consts[i], val)); 595 } 596 } 597 checkGLcall("Load ps int consts"); 598 } 599 600 /* Context activation is done by the caller. */ 601 static void shader_arb_vs_local_constants(const struct arb_vs_compiled_shader *gl_shader, 602 const struct wined3d_context *context, const struct wined3d_state *state) 603 { 604 const struct wined3d_gl_info *gl_info = context->gl_info; 605 float position_fixup[4]; 606 unsigned char i; 607 608 /* Upload the position fixup */ 609 shader_get_position_fixup(context, state, position_fixup); 610 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->pos_fixup, position_fixup)); 611 612 if (!gl_shader->num_int_consts) return; 613 614 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 615 { 616 if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED) 617 { 618 float val[4]; 619 val[0] = (float)state->vs_consts_i[i].x; 620 val[1] = (float)state->vs_consts_i[i].y; 621 val[2] = (float)state->vs_consts_i[i].z; 622 val[3] = -1.0f; 623 624 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->int_consts[i], val)); 625 } 626 } 627 checkGLcall("Load vs int consts"); 628 } 629 630 static void shader_arb_select(void *shader_priv, struct wined3d_context *context, 631 const struct wined3d_state *state); 632 633 /** 634 * Loads the app-supplied constants into the currently set ARB_[vertex/fragment]_programs. 635 * 636 * We only support float constants in ARB at the moment, so don't 637 * worry about the Integers or Booleans 638 */ 639 /* Context activation is done by the caller (state handler). */ 640 static void shader_arb_load_constants_internal(struct shader_arb_priv *priv, 641 struct wined3d_context *context, const struct wined3d_state *state, 642 BOOL usePixelShader, BOOL useVertexShader, BOOL from_shader_select) 643 { 644 const struct wined3d_d3d_info *d3d_info = context->d3d_info; 645 const struct wined3d_gl_info *gl_info = context->gl_info; 646 647 if (!from_shader_select) 648 { 649 const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX]; 650 const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 651 652 if (vshader 653 && (vshader->reg_maps.boolean_constants 654 || (!gl_info->supported[NV_VERTEX_PROGRAM2_OPTION] 655 && (vshader->reg_maps.integer_constants & ~vshader->reg_maps.local_int_consts)))) 656 { 657 TRACE("bool/integer vertex shader constants potentially modified, forcing shader reselection.\n"); 658 shader_arb_select(priv, context, state); 659 } 660 else if (pshader 661 && (pshader->reg_maps.boolean_constants 662 || (!gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION] 663 && (pshader->reg_maps.integer_constants & ~pshader->reg_maps.local_int_consts)))) 664 { 665 TRACE("bool/integer pixel shader constants potentially modified, forcing shader reselection.\n"); 666 shader_arb_select(priv, context, state); 667 } 668 } 669 670 if (context != priv->last_context) 671 { 672 memset(priv->vshader_const_dirty, 1, 673 sizeof(*priv->vshader_const_dirty) * d3d_info->limits.vs_uniform_count); 674 priv->highest_dirty_vs_const = d3d_info->limits.vs_uniform_count; 675 676 memset(priv->pshader_const_dirty, 1, 677 sizeof(*priv->pshader_const_dirty) * d3d_info->limits.ps_uniform_count); 678 priv->highest_dirty_ps_const = d3d_info->limits.ps_uniform_count; 679 680 priv->last_context = context; 681 } 682 683 if (useVertexShader) 684 { 685 const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX]; 686 const struct arb_vs_compiled_shader *gl_shader = priv->compiled_vprog; 687 688 /* Load DirectX 9 float constants for vertex shader */ 689 priv->highest_dirty_vs_const = shader_arb_load_constants_f(vshader, gl_info, GL_VERTEX_PROGRAM_ARB, 690 priv->highest_dirty_vs_const, state->vs_consts_f, priv->vshader_const_dirty); 691 shader_arb_vs_local_constants(gl_shader, context, state); 692 } 693 694 if (usePixelShader) 695 { 696 const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 697 const struct arb_ps_compiled_shader *gl_shader = priv->compiled_fprog; 698 UINT rt_height = state->fb->render_targets[0]->height; 699 700 /* Load DirectX 9 float constants for pixel shader */ 701 priv->highest_dirty_ps_const = shader_arb_load_constants_f(pshader, gl_info, GL_FRAGMENT_PROGRAM_ARB, 702 priv->highest_dirty_ps_const, state->ps_consts_f, priv->pshader_const_dirty); 703 shader_arb_ps_local_constants(gl_shader, context, state, rt_height); 704 705 if (context->constant_update_mask & WINED3D_SHADER_CONST_PS_NP2_FIXUP) 706 shader_arb_load_np2fixup_constants(&gl_shader->np2fixup_info, gl_info, state); 707 } 708 } 709 710 static void shader_arb_load_constants(void *shader_priv, struct wined3d_context *context, 711 const struct wined3d_state *state) 712 { 713 BOOL vs = use_vs(state); 714 BOOL ps = use_ps(state); 715 716 shader_arb_load_constants_internal(shader_priv, context, state, ps, vs, FALSE); 717 } 718 719 static void shader_arb_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count) 720 { 721 struct wined3d_context *context = context_get_current(); 722 struct shader_arb_priv *priv = device->shader_priv; 723 724 /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active 725 * context. On a context switch the old context will be fully dirtified */ 726 if (!context || context->device != device) 727 return; 728 729 memset(priv->vshader_const_dirty + start, 1, sizeof(*priv->vshader_const_dirty) * count); 730 priv->highest_dirty_vs_const = max(priv->highest_dirty_vs_const, start + count); 731 } 732 733 static void shader_arb_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count) 734 { 735 struct wined3d_context *context = context_get_current(); 736 struct shader_arb_priv *priv = device->shader_priv; 737 738 /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active 739 * context. On a context switch the old context will be fully dirtified */ 740 if (!context || context->device != device) 741 return; 742 743 memset(priv->pshader_const_dirty + start, 1, sizeof(*priv->pshader_const_dirty) * count); 744 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, start + count); 745 } 746 747 static void shader_arb_append_imm_vec4(struct wined3d_string_buffer *buffer, const float *values) 748 { 749 char str[4][17]; 750 751 wined3d_ftoa(values[0], str[0]); 752 wined3d_ftoa(values[1], str[1]); 753 wined3d_ftoa(values[2], str[2]); 754 wined3d_ftoa(values[3], str[3]); 755 shader_addline(buffer, "{%s, %s, %s, %s}", str[0], str[1], str[2], str[3]); 756 } 757 758 /* Generate the variable & register declarations for the ARB_vertex_program output target */ 759 static void shader_generate_arb_declarations(const struct wined3d_shader *shader, 760 const struct wined3d_shader_reg_maps *reg_maps, struct wined3d_string_buffer *buffer, 761 const struct wined3d_gl_info *gl_info, DWORD *num_clipplanes, 762 const struct shader_arb_ctx_priv *ctx) 763 { 764 DWORD i; 765 char pshader = shader_is_pshader_version(reg_maps->shader_version.type); 766 const struct wined3d_shader_lconst *lconst; 767 unsigned max_constantsF; 768 DWORD map; 769 770 /* In pixel shaders, all private constants are program local, we don't need anything 771 * from program.env. Thus we can advertise the full set of constants in pixel shaders. 772 * If we need a private constant the GL implementation will squeeze it in somewhere 773 * 774 * With vertex shaders we need the posFixup and on some GL implementations 4 helper 775 * immediate values. The posFixup is loaded using program.env for now, so always 776 * subtract one from the number of constants. If the shader uses indirect addressing, 777 * account for the helper const too because we have to declare all available d3d constants 778 * and don't know which are actually used. 779 */ 780 if (pshader) 781 { 782 max_constantsF = gl_info->limits.arb_ps_native_constants; 783 /* 24 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. */ 784 if (max_constantsF < 24) 785 max_constantsF = gl_info->limits.arb_ps_float_constants; 786 } 787 else 788 { 789 const struct arb_vshader_private *shader_data = shader->backend_data; 790 max_constantsF = gl_info->limits.arb_vs_native_constants; 791 /* 96 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. 792 * Also prevents max_constantsF from becoming less than 0 and 793 * wrapping . */ 794 if (max_constantsF < 96) 795 max_constantsF = gl_info->limits.arb_vs_float_constants; 796 797 if (reg_maps->usesrelconstF) 798 { 799 DWORD highest_constf = 0, clip_limit; 800 801 max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info); 802 max_constantsF -= wined3d_popcount(reg_maps->integer_constants); 803 max_constantsF -= gl_info->reserved_arb_constants; 804 805 for (i = 0; i < shader->limits->constant_float; ++i) 806 { 807 DWORD idx = i >> 5; 808 DWORD shift = i & 0x1f; 809 if (reg_maps->constf[idx] & (1u << shift)) 810 highest_constf = i; 811 } 812 813 if(use_nv_clip(gl_info) && ctx->target_version >= NV2) 814 { 815 if(ctx->cur_vs_args->super.clip_enabled) 816 clip_limit = gl_info->limits.user_clip_distances; 817 else 818 clip_limit = 0; 819 } 820 else 821 { 822 unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask; 823 clip_limit = min(wined3d_popcount(mask), 4); 824 } 825 *num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1); 826 max_constantsF -= *num_clipplanes; 827 if(*num_clipplanes < clip_limit) 828 { 829 WARN("Only %u clip planes out of %u enabled.\n", *num_clipplanes, 830 gl_info->limits.user_clip_distances); 831 } 832 } 833 else 834 { 835 if (ctx->target_version >= NV2) 836 *num_clipplanes = gl_info->limits.user_clip_distances; 837 else 838 *num_clipplanes = min(gl_info->limits.user_clip_distances, 4); 839 } 840 } 841 842 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) 843 { 844 if (map & 1) shader_addline(buffer, "TEMP R%u;\n", i); 845 } 846 847 for (i = 0, map = reg_maps->address; map; map >>= 1, ++i) 848 { 849 if (map & 1) shader_addline(buffer, "ADDRESS A%u;\n", i); 850 } 851 852 if (pshader && reg_maps->shader_version.major == 1 && reg_maps->shader_version.minor <= 3) 853 { 854 for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i) 855 { 856 if (map & 1) shader_addline(buffer, "TEMP T%u;\n", i); 857 } 858 } 859 860 if (!shader->load_local_constsF) 861 { 862 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 863 { 864 const float *value; 865 value = (const float *)lconst->value; 866 shader_addline(buffer, "PARAM C%u = ", lconst->idx); 867 shader_arb_append_imm_vec4(buffer, value); 868 shader_addline(buffer, ";\n"); 869 } 870 } 871 872 /* After subtracting privately used constants from the hardware limit(they are loaded as 873 * local constants), make sure the shader doesn't violate the env constant limit 874 */ 875 if (pshader) 876 { 877 max_constantsF = min(max_constantsF, gl_info->limits.arb_ps_float_constants); 878 } 879 else 880 { 881 max_constantsF = min(max_constantsF, gl_info->limits.arb_vs_float_constants); 882 } 883 884 /* Avoid declaring more constants than needed */ 885 max_constantsF = min(max_constantsF, shader->limits->constant_float); 886 887 /* we use the array-based constants array if the local constants are marked for loading, 888 * because then we use indirect addressing, or when the local constant list is empty, 889 * because then we don't know if we're using indirect addressing or not. If we're hardcoding 890 * local constants do not declare the loaded constants as an array because ARB compilers usually 891 * do not optimize unused constants away 892 */ 893 if (reg_maps->usesrelconstF) 894 { 895 /* Need to PARAM the environment parameters (constants) so we can use relative addressing */ 896 shader_addline(buffer, "PARAM C[%d] = { program.env[0..%d] };\n", 897 max_constantsF, max_constantsF - 1); 898 } 899 else 900 { 901 for (i = 0; i < max_constantsF; ++i) 902 { 903 if (!shader_constant_is_local(shader, i) && wined3d_extract_bits(reg_maps->constf, i, 1)) 904 { 905 shader_addline(buffer, "PARAM C%d = program.env[%d];\n",i, i); 906 } 907 } 908 } 909 } 910 911 static const char * const shift_tab[] = { 912 "dummy", /* 0 (none) */ 913 "coefmul.x", /* 1 (x2) */ 914 "coefmul.y", /* 2 (x4) */ 915 "coefmul.z", /* 3 (x8) */ 916 "coefmul.w", /* 4 (x16) */ 917 "dummy", /* 5 (x32) */ 918 "dummy", /* 6 (x64) */ 919 "dummy", /* 7 (x128) */ 920 "dummy", /* 8 (d256) */ 921 "dummy", /* 9 (d128) */ 922 "dummy", /* 10 (d64) */ 923 "dummy", /* 11 (d32) */ 924 "coefdiv.w", /* 12 (d16) */ 925 "coefdiv.z", /* 13 (d8) */ 926 "coefdiv.y", /* 14 (d4) */ 927 "coefdiv.x" /* 15 (d2) */ 928 }; 929 930 static void shader_arb_get_write_mask(const struct wined3d_shader_instruction *ins, 931 const struct wined3d_shader_dst_param *dst, char *write_mask) 932 { 933 char *ptr = write_mask; 934 935 if (dst->write_mask != WINED3DSP_WRITEMASK_ALL) 936 { 937 *ptr++ = '.'; 938 if (dst->write_mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x'; 939 if (dst->write_mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y'; 940 if (dst->write_mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z'; 941 if (dst->write_mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w'; 942 } 943 944 *ptr = '\0'; 945 } 946 947 static void shader_arb_get_swizzle(const struct wined3d_shader_src_param *param, BOOL fixup, char *swizzle_str) 948 { 949 /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra", 950 * but addressed as "rgba". To fix this we need to swap the register's x 951 * and z components. */ 952 const char *swizzle_chars = fixup ? "zyxw" : "xyzw"; 953 char *ptr = swizzle_str; 954 955 /* swizzle bits fields: wwzzyyxx */ 956 DWORD swizzle = param->swizzle; 957 DWORD swizzle_x = swizzle & 0x03; 958 DWORD swizzle_y = (swizzle >> 2) & 0x03; 959 DWORD swizzle_z = (swizzle >> 4) & 0x03; 960 DWORD swizzle_w = (swizzle >> 6) & 0x03; 961 962 /* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to 963 * generate a swizzle string. Unless we need to our own swizzling. */ 964 if (swizzle != WINED3DSP_NOSWIZZLE || fixup) 965 { 966 *ptr++ = '.'; 967 if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) { 968 *ptr++ = swizzle_chars[swizzle_x]; 969 } else { 970 *ptr++ = swizzle_chars[swizzle_x]; 971 *ptr++ = swizzle_chars[swizzle_y]; 972 *ptr++ = swizzle_chars[swizzle_z]; 973 *ptr++ = swizzle_chars[swizzle_w]; 974 } 975 } 976 977 *ptr = '\0'; 978 } 979 980 static void shader_arb_request_a0(const struct wined3d_shader_instruction *ins, const char *src) 981 { 982 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 983 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 984 985 if (!strcmp(priv->addr_reg, src)) return; 986 987 strcpy(priv->addr_reg, src); 988 shader_addline(buffer, "ARL A0.x, %s;\n", src); 989 } 990 991 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins, 992 const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr); 993 994 static void shader_arb_get_register_name(const struct wined3d_shader_instruction *ins, 995 const struct wined3d_shader_register *reg, char *register_name, BOOL *is_color) 996 { 997 /* oPos, oFog and oPts in D3D */ 998 static const char * const rastout_reg_names[] = {"TMP_OUT", "TMP_FOGCOORD", "result.pointsize"}; 999 const struct wined3d_shader *shader = ins->ctx->shader; 1000 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 1001 BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type); 1002 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1003 1004 *is_color = FALSE; 1005 1006 switch (reg->type) 1007 { 1008 case WINED3DSPR_TEMP: 1009 sprintf(register_name, "R%u", reg->idx[0].offset); 1010 break; 1011 1012 case WINED3DSPR_INPUT: 1013 if (pshader) 1014 { 1015 if (reg_maps->shader_version.major < 3) 1016 { 1017 if (!reg->idx[0].offset) 1018 strcpy(register_name, "fragment.color.primary"); 1019 else 1020 strcpy(register_name, "fragment.color.secondary"); 1021 } 1022 else 1023 { 1024 if (reg->idx[0].rel_addr) 1025 { 1026 char rel_reg[50]; 1027 shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg); 1028 1029 if (!strcmp(rel_reg, "**aL_emul**")) 1030 { 1031 DWORD idx = ctx->aL + reg->idx[0].offset; 1032 if(idx < MAX_REG_INPUT) 1033 { 1034 strcpy(register_name, ctx->ps_input[idx]); 1035 } 1036 else 1037 { 1038 ERR("Pixel shader input register out of bounds: %u\n", idx); 1039 sprintf(register_name, "out_of_bounds_%u", idx); 1040 } 1041 } 1042 else if (reg_maps->input_registers & 0x0300) 1043 { 1044 /* There are two ways basically: 1045 * 1046 * 1) Use the unrolling code that is used for loop emulation and unroll the loop. 1047 * That means trouble if the loop also contains a breakc or if the control values 1048 * aren't local constants. 1049 * 2) Generate an if block that checks if aL.y < 8, == 8 or == 9 and selects the 1050 * source dynamically. The trouble is that we cannot simply read aL.y because it 1051 * is an ADDRESS register. We could however push it, load .zw with a value and use 1052 * ADAC to load the condition code register and pop it again afterwards 1053 */ 1054 FIXME("Relative input register addressing with more than 8 registers\n"); 1055 1056 /* This is better than nothing for now */ 1057 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset); 1058 } 1059 else if(ctx->cur_ps_args->super.vp_mode != vertexshader) 1060 { 1061 /* This is problematic because we'd have to consult the ctx->ps_input strings 1062 * for where to find the varying. Some may be "0.0", others can be texcoords or 1063 * colors. This needs either a pipeline replacement to make the vertex shader feed 1064 * proper varyings, or loop unrolling 1065 * 1066 * For now use the texcoords and hope for the best 1067 */ 1068 FIXME("Non-vertex shader varying input with indirect addressing\n"); 1069 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset); 1070 } 1071 else 1072 { 1073 /* D3D supports indirect addressing only with aL in loop registers. The loop instruction 1074 * pulls GL_NV_fragment_program2 in 1075 */ 1076 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset); 1077 } 1078 } 1079 else 1080 { 1081 if (reg->idx[0].offset < MAX_REG_INPUT) 1082 { 1083 strcpy(register_name, ctx->ps_input[reg->idx[0].offset]); 1084 } 1085 else 1086 { 1087 ERR("Pixel shader input register out of bounds: %u\n", reg->idx[0].offset); 1088 sprintf(register_name, "out_of_bounds_%u", reg->idx[0].offset); 1089 } 1090 } 1091 } 1092 } 1093 else 1094 { 1095 if (ctx->cur_vs_args->super.swizzle_map & (1u << reg->idx[0].offset)) 1096 *is_color = TRUE; 1097 sprintf(register_name, "vertex.attrib[%u]", reg->idx[0].offset); 1098 } 1099 break; 1100 1101 case WINED3DSPR_CONST: 1102 if (!pshader && reg->idx[0].rel_addr) 1103 { 1104 const struct arb_vshader_private *shader_data = shader->backend_data; 1105 UINT rel_offset = ctx->target_version == ARB ? shader_data->rel_offset : 0; 1106 BOOL aL = FALSE; 1107 char rel_reg[50]; 1108 if (reg_maps->shader_version.major < 2) 1109 { 1110 sprintf(rel_reg, "A0.x"); 1111 } 1112 else 1113 { 1114 shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg); 1115 if (ctx->target_version == ARB) 1116 { 1117 if (!strcmp(rel_reg, "**aL_emul**")) 1118 { 1119 aL = TRUE; 1120 } else { 1121 shader_arb_request_a0(ins, rel_reg); 1122 sprintf(rel_reg, "A0.x"); 1123 } 1124 } 1125 } 1126 if (aL) 1127 sprintf(register_name, "C[%u]", ctx->aL + reg->idx[0].offset); 1128 else if (reg->idx[0].offset >= rel_offset) 1129 sprintf(register_name, "C[%s + %u]", rel_reg, reg->idx[0].offset - rel_offset); 1130 else 1131 sprintf(register_name, "C[%s - %u]", rel_reg, rel_offset - reg->idx[0].offset); 1132 } 1133 else 1134 { 1135 if (reg_maps->usesrelconstF) 1136 sprintf(register_name, "C[%u]", reg->idx[0].offset); 1137 else 1138 sprintf(register_name, "C%u", reg->idx[0].offset); 1139 } 1140 break; 1141 1142 case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */ 1143 if (pshader) 1144 { 1145 if (reg_maps->shader_version.major == 1 1146 && reg_maps->shader_version.minor <= 3) 1147 /* In ps <= 1.3, Tx is a temporary register as destination 1148 * to all instructions, and as source to most instructions. 1149 * For some instructions it is the texcoord input. Those 1150 * instructions know about the special use. */ 1151 sprintf(register_name, "T%u", reg->idx[0].offset); 1152 else 1153 /* In ps 1.4 and 2.x Tx is always a (read-only) varying. */ 1154 sprintf(register_name, "fragment.texcoord[%u]", reg->idx[0].offset); 1155 } 1156 else 1157 { 1158 if (reg_maps->shader_version.major == 1 || ctx->target_version >= NV2) 1159 sprintf(register_name, "A%u", reg->idx[0].offset); 1160 else 1161 sprintf(register_name, "A%u_SHADOW", reg->idx[0].offset); 1162 } 1163 break; 1164 1165 case WINED3DSPR_COLOROUT: 1166 if (ctx->ps_post_process && !reg->idx[0].offset) 1167 { 1168 strcpy(register_name, "TMP_COLOR"); 1169 } 1170 else 1171 { 1172 if (ctx->cur_ps_args->super.srgb_correction) 1173 FIXME("sRGB correction on higher render targets.\n"); 1174 if (reg_maps->rt_mask > 1) 1175 sprintf(register_name, "result.color[%u]", reg->idx[0].offset); 1176 else 1177 strcpy(register_name, "result.color"); 1178 } 1179 break; 1180 1181 case WINED3DSPR_RASTOUT: 1182 if (reg->idx[0].offset == 1) 1183 sprintf(register_name, "%s", ctx->fog_output); 1184 else 1185 sprintf(register_name, "%s", rastout_reg_names[reg->idx[0].offset]); 1186 break; 1187 1188 case WINED3DSPR_DEPTHOUT: 1189 strcpy(register_name, "result.depth"); 1190 break; 1191 1192 case WINED3DSPR_ATTROUT: 1193 /* case WINED3DSPR_OUTPUT: */ 1194 if (pshader) 1195 sprintf(register_name, "oD[%u]", reg->idx[0].offset); 1196 else 1197 strcpy(register_name, ctx->color_output[reg->idx[0].offset]); 1198 break; 1199 1200 case WINED3DSPR_TEXCRDOUT: 1201 if (pshader) 1202 sprintf(register_name, "oT[%u]", reg->idx[0].offset); 1203 else if (reg_maps->shader_version.major < 3) 1204 strcpy(register_name, ctx->texcrd_output[reg->idx[0].offset]); 1205 else 1206 strcpy(register_name, ctx->vs_output[reg->idx[0].offset]); 1207 break; 1208 1209 case WINED3DSPR_LOOP: 1210 if(ctx->target_version >= NV2) 1211 { 1212 /* Pshader has an implicitly declared loop index counter A0.x that cannot be renamed */ 1213 if(pshader) sprintf(register_name, "A0.x"); 1214 else sprintf(register_name, "aL.y"); 1215 } 1216 else 1217 { 1218 /* Unfortunately this code cannot return the value of ctx->aL here. An immediate value 1219 * would be valid, but if aL is used for indexing(its only use), there's likely an offset, 1220 * thus the result would be something like C[15 + 30], which is not valid in the ARB program 1221 * grammar. So return a marker for the emulated aL and intercept it in constant and varying 1222 * indexing 1223 */ 1224 sprintf(register_name, "**aL_emul**"); 1225 } 1226 1227 break; 1228 1229 case WINED3DSPR_CONSTINT: 1230 sprintf(register_name, "I%u", reg->idx[0].offset); 1231 break; 1232 1233 case WINED3DSPR_MISCTYPE: 1234 if (!reg->idx[0].offset) 1235 sprintf(register_name, "vpos"); 1236 else if (reg->idx[0].offset == 1) 1237 sprintf(register_name, "fragment.facing.x"); 1238 else 1239 FIXME("Unknown MISCTYPE register index %u.\n", reg->idx[0].offset); 1240 break; 1241 1242 default: 1243 FIXME("Unhandled register type %#x[%u].\n", reg->type, reg->idx[0].offset); 1244 sprintf(register_name, "unrecognized_register[%u]", reg->idx[0].offset); 1245 break; 1246 } 1247 } 1248 1249 static void shader_arb_get_dst_param(const struct wined3d_shader_instruction *ins, 1250 const struct wined3d_shader_dst_param *wined3d_dst, char *str) 1251 { 1252 char register_name[255]; 1253 char write_mask[6]; 1254 BOOL is_color; 1255 1256 shader_arb_get_register_name(ins, &wined3d_dst->reg, register_name, &is_color); 1257 strcpy(str, register_name); 1258 1259 shader_arb_get_write_mask(ins, wined3d_dst, write_mask); 1260 strcat(str, write_mask); 1261 } 1262 1263 static const char *shader_arb_get_fixup_swizzle(enum fixup_channel_source channel_source) 1264 { 1265 switch(channel_source) 1266 { 1267 case CHANNEL_SOURCE_ZERO: return "0"; 1268 case CHANNEL_SOURCE_ONE: return "1"; 1269 case CHANNEL_SOURCE_X: return "x"; 1270 case CHANNEL_SOURCE_Y: return "y"; 1271 case CHANNEL_SOURCE_Z: return "z"; 1272 case CHANNEL_SOURCE_W: return "w"; 1273 default: 1274 FIXME("Unhandled channel source %#x\n", channel_source); 1275 return "undefined"; 1276 } 1277 } 1278 1279 struct color_fixup_masks 1280 { 1281 DWORD source; 1282 DWORD sign; 1283 }; 1284 1285 static struct color_fixup_masks calc_color_correction(struct color_fixup_desc fixup, DWORD dst_mask) 1286 { 1287 struct color_fixup_masks masks = {0, 0}; 1288 1289 if (is_complex_fixup(fixup)) 1290 { 1291 enum complex_fixup complex_fixup = get_complex_fixup(fixup); 1292 FIXME("Complex fixup (%#x) not supported\n", complex_fixup); 1293 return masks; 1294 } 1295 1296 if (fixup.x_source != CHANNEL_SOURCE_X) 1297 masks.source |= WINED3DSP_WRITEMASK_0; 1298 if (fixup.y_source != CHANNEL_SOURCE_Y) 1299 masks.source |= WINED3DSP_WRITEMASK_1; 1300 if (fixup.z_source != CHANNEL_SOURCE_Z) 1301 masks.source |= WINED3DSP_WRITEMASK_2; 1302 if (fixup.w_source != CHANNEL_SOURCE_W) 1303 masks.source |= WINED3DSP_WRITEMASK_3; 1304 masks.source &= dst_mask; 1305 1306 if (fixup.x_sign_fixup) 1307 masks.sign |= WINED3DSP_WRITEMASK_0; 1308 if (fixup.y_sign_fixup) 1309 masks.sign |= WINED3DSP_WRITEMASK_1; 1310 if (fixup.z_sign_fixup) 1311 masks.sign |= WINED3DSP_WRITEMASK_2; 1312 if (fixup.w_sign_fixup) 1313 masks.sign |= WINED3DSP_WRITEMASK_3; 1314 masks.sign &= dst_mask; 1315 1316 return masks; 1317 } 1318 1319 static void gen_color_correction(struct wined3d_string_buffer *buffer, const char *dst, 1320 const char *src, const char *one, const char *two, 1321 struct color_fixup_desc fixup, struct color_fixup_masks masks) 1322 { 1323 const char *sign_fixup_src = dst; 1324 1325 if (masks.source) 1326 { 1327 if (masks.sign) 1328 sign_fixup_src = "TA"; 1329 1330 shader_addline(buffer, "SWZ %s, %s, %s, %s, %s, %s;\n", sign_fixup_src, src, 1331 shader_arb_get_fixup_swizzle(fixup.x_source), shader_arb_get_fixup_swizzle(fixup.y_source), 1332 shader_arb_get_fixup_swizzle(fixup.z_source), shader_arb_get_fixup_swizzle(fixup.w_source)); 1333 } 1334 else if (masks.sign) 1335 { 1336 sign_fixup_src = src; 1337 } 1338 1339 if (masks.sign) 1340 { 1341 char reg_mask[6]; 1342 char *ptr = reg_mask; 1343 1344 if (masks.sign != WINED3DSP_WRITEMASK_ALL) 1345 { 1346 *ptr++ = '.'; 1347 if (masks.sign & WINED3DSP_WRITEMASK_0) 1348 *ptr++ = 'x'; 1349 if (masks.sign & WINED3DSP_WRITEMASK_1) 1350 *ptr++ = 'y'; 1351 if (masks.sign & WINED3DSP_WRITEMASK_2) 1352 *ptr++ = 'z'; 1353 if (masks.sign & WINED3DSP_WRITEMASK_3) 1354 *ptr++ = 'w'; 1355 } 1356 *ptr = '\0'; 1357 1358 shader_addline(buffer, "MAD %s%s, %s, %s, -%s;\n", dst, reg_mask, sign_fixup_src, two, one); 1359 } 1360 } 1361 1362 static const char *shader_arb_get_modifier(const struct wined3d_shader_instruction *ins) 1363 { 1364 DWORD mod; 1365 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1366 if (!ins->dst_count) return ""; 1367 1368 mod = ins->dst[0].modifiers; 1369 1370 /* Silently ignore PARTIALPRECISION if it's not supported */ 1371 if(priv->target_version == ARB) mod &= ~WINED3DSPDM_PARTIALPRECISION; 1372 1373 if(mod & WINED3DSPDM_MSAMPCENTROID) 1374 { 1375 FIXME("Unhandled modifier WINED3DSPDM_MSAMPCENTROID\n"); 1376 mod &= ~WINED3DSPDM_MSAMPCENTROID; 1377 } 1378 1379 switch(mod) 1380 { 1381 case WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION: 1382 return "H_SAT"; 1383 1384 case WINED3DSPDM_SATURATE: 1385 return "_SAT"; 1386 1387 case WINED3DSPDM_PARTIALPRECISION: 1388 return "H"; 1389 1390 case 0: 1391 return ""; 1392 1393 default: 1394 FIXME("Unknown modifiers 0x%08x\n", mod); 1395 return ""; 1396 } 1397 } 1398 1399 #define TEX_PROJ 0x1 1400 #define TEX_BIAS 0x2 1401 #define TEX_LOD 0x4 1402 #define TEX_DERIV 0x10 1403 1404 static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD sampler_idx, 1405 const char *dst_str, const char *coord_reg, WORD flags, const char *dsx, const char *dsy) 1406 { 1407 enum wined3d_shader_resource_type resource_type = ins->ctx->reg_maps->resource_info[sampler_idx].type; 1408 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1409 const char *tex_type; 1410 BOOL np2_fixup = FALSE; 1411 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1412 const char *mod; 1413 BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type); 1414 const struct wined3d_shader *shader; 1415 const struct wined3d_device *device; 1416 const struct wined3d_gl_info *gl_info; 1417 const char *tex_dst = dst_str; 1418 struct color_fixup_masks masks; 1419 1420 /* D3D vertex shader sampler IDs are vertex samplers(0-3), not global d3d samplers */ 1421 if(!pshader) sampler_idx += MAX_FRAGMENT_SAMPLERS; 1422 1423 switch (resource_type) 1424 { 1425 case WINED3D_SHADER_RESOURCE_TEXTURE_1D: 1426 tex_type = "1D"; 1427 break; 1428 1429 case WINED3D_SHADER_RESOURCE_TEXTURE_2D: 1430 shader = ins->ctx->shader; 1431 device = shader->device; 1432 gl_info = &device->adapter->gl_info; 1433 1434 if (pshader && priv->cur_ps_args->super.np2_fixup & (1u << sampler_idx) 1435 && gl_info->supported[ARB_TEXTURE_RECTANGLE]) 1436 tex_type = "RECT"; 1437 else 1438 tex_type = "2D"; 1439 if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) 1440 { 1441 if (priv->cur_np2fixup_info->super.active & (1u << sampler_idx)) 1442 { 1443 if (flags) FIXME("Only ordinary sampling from NP2 textures is supported.\n"); 1444 else np2_fixup = TRUE; 1445 } 1446 } 1447 break; 1448 1449 case WINED3D_SHADER_RESOURCE_TEXTURE_3D: 1450 tex_type = "3D"; 1451 break; 1452 1453 case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE: 1454 tex_type = "CUBE"; 1455 break; 1456 1457 default: 1458 ERR("Unexpected resource type %#x.\n", resource_type); 1459 tex_type = ""; 1460 } 1461 1462 /* TEX, TXL, TXD and TXP do not support the "H" modifier, 1463 * so don't use shader_arb_get_modifier 1464 */ 1465 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) mod = "_SAT"; 1466 else mod = ""; 1467 1468 /* Fragment samplers always have indentity mapping */ 1469 if(sampler_idx >= MAX_FRAGMENT_SAMPLERS) 1470 { 1471 sampler_idx = priv->cur_vs_args->vertex.samplers[sampler_idx - MAX_FRAGMENT_SAMPLERS]; 1472 } 1473 1474 if (pshader) 1475 { 1476 masks = calc_color_correction(priv->cur_ps_args->super.color_fixup[sampler_idx], 1477 ins->dst[0].write_mask); 1478 1479 if (masks.source || masks.sign) 1480 tex_dst = "TA"; 1481 } 1482 1483 if (flags & TEX_DERIV) 1484 { 1485 if(flags & TEX_PROJ) FIXME("Projected texture sampling with custom derivatives\n"); 1486 if(flags & TEX_BIAS) FIXME("Biased texture sampling with custom derivatives\n"); 1487 shader_addline(buffer, "TXD%s %s, %s, %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, 1488 dsx, dsy, sampler_idx, tex_type); 1489 } 1490 else if(flags & TEX_LOD) 1491 { 1492 if(flags & TEX_PROJ) FIXME("Projected texture sampling with explicit lod\n"); 1493 if(flags & TEX_BIAS) FIXME("Biased texture sampling with explicit lod\n"); 1494 shader_addline(buffer, "TXL%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, 1495 sampler_idx, tex_type); 1496 } 1497 else if (flags & TEX_BIAS) 1498 { 1499 /* Shouldn't be possible, but let's check for it */ 1500 if(flags & TEX_PROJ) FIXME("Biased and Projected texture sampling\n"); 1501 /* TXB takes the 4th component of the source vector automatically, as d3d. Nothing more to do */ 1502 shader_addline(buffer, "TXB%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type); 1503 } 1504 else if (flags & TEX_PROJ) 1505 { 1506 shader_addline(buffer, "TXP%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type); 1507 } 1508 else 1509 { 1510 if (np2_fixup) 1511 { 1512 const unsigned char idx = priv->cur_np2fixup_info->super.idx[sampler_idx]; 1513 shader_addline(buffer, "MUL TA, np2fixup[%u].%s, %s;\n", idx >> 1, 1514 (idx % 2) ? "zwxy" : "xyzw", coord_reg); 1515 1516 shader_addline(buffer, "TEX%s %s, TA, texture[%u], %s;\n", mod, tex_dst, sampler_idx, tex_type); 1517 } 1518 else 1519 shader_addline(buffer, "TEX%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type); 1520 } 1521 1522 if (pshader) 1523 { 1524 gen_color_correction(buffer, dst_str, tex_dst, 1525 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_ONE), 1526 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_TWO), 1527 priv->cur_ps_args->super.color_fixup[sampler_idx], masks); 1528 } 1529 } 1530 1531 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins, 1532 const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr) 1533 { 1534 /* Generate a line that does the input modifier computation and return the input register to use */ 1535 BOOL is_color = FALSE, insert_line; 1536 char regstr[256]; 1537 char swzstr[20]; 1538 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1539 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1540 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 1541 const char *two = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_TWO); 1542 1543 /* Assume a new line will be added */ 1544 insert_line = TRUE; 1545 1546 /* Get register name */ 1547 shader_arb_get_register_name(ins, &src->reg, regstr, &is_color); 1548 shader_arb_get_swizzle(src, is_color, swzstr); 1549 1550 switch (src->modifiers) 1551 { 1552 case WINED3DSPSM_NONE: 1553 sprintf(outregstr, "%s%s", regstr, swzstr); 1554 insert_line = FALSE; 1555 break; 1556 case WINED3DSPSM_NEG: 1557 sprintf(outregstr, "-%s%s", regstr, swzstr); 1558 insert_line = FALSE; 1559 break; 1560 case WINED3DSPSM_BIAS: 1561 shader_addline(buffer, "ADD T%c, %s, -coefdiv.x;\n", 'A' + tmpreg, regstr); 1562 break; 1563 case WINED3DSPSM_BIASNEG: 1564 shader_addline(buffer, "ADD T%c, -%s, coefdiv.x;\n", 'A' + tmpreg, regstr); 1565 break; 1566 case WINED3DSPSM_SIGN: 1567 shader_addline(buffer, "MAD T%c, %s, %s, -%s;\n", 'A' + tmpreg, regstr, two, one); 1568 break; 1569 case WINED3DSPSM_SIGNNEG: 1570 shader_addline(buffer, "MAD T%c, %s, -%s, %s;\n", 'A' + tmpreg, regstr, two, one); 1571 break; 1572 case WINED3DSPSM_COMP: 1573 shader_addline(buffer, "SUB T%c, %s, %s;\n", 'A' + tmpreg, one, regstr); 1574 break; 1575 case WINED3DSPSM_X2: 1576 shader_addline(buffer, "ADD T%c, %s, %s;\n", 'A' + tmpreg, regstr, regstr); 1577 break; 1578 case WINED3DSPSM_X2NEG: 1579 shader_addline(buffer, "ADD T%c, -%s, -%s;\n", 'A' + tmpreg, regstr, regstr); 1580 break; 1581 case WINED3DSPSM_DZ: 1582 shader_addline(buffer, "RCP T%c, %s.z;\n", 'A' + tmpreg, regstr); 1583 shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg); 1584 break; 1585 case WINED3DSPSM_DW: 1586 shader_addline(buffer, "RCP T%c, %s.w;\n", 'A' + tmpreg, regstr); 1587 shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg); 1588 break; 1589 case WINED3DSPSM_ABS: 1590 if(ctx->target_version >= NV2) { 1591 sprintf(outregstr, "|%s%s|", regstr, swzstr); 1592 insert_line = FALSE; 1593 } else { 1594 shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr); 1595 } 1596 break; 1597 case WINED3DSPSM_ABSNEG: 1598 if(ctx->target_version >= NV2) { 1599 sprintf(outregstr, "-|%s%s|", regstr, swzstr); 1600 } else { 1601 shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr); 1602 sprintf(outregstr, "-T%c%s", 'A' + tmpreg, swzstr); 1603 } 1604 insert_line = FALSE; 1605 break; 1606 default: 1607 sprintf(outregstr, "%s%s", regstr, swzstr); 1608 insert_line = FALSE; 1609 } 1610 1611 /* Return modified or original register, with swizzle */ 1612 if (insert_line) 1613 sprintf(outregstr, "T%c%s", 'A' + tmpreg, swzstr); 1614 } 1615 1616 static void pshader_hw_bem(const struct wined3d_shader_instruction *ins) 1617 { 1618 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1619 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1620 DWORD sampler_code = dst->reg.idx[0].offset; 1621 char dst_name[50]; 1622 char src_name[2][50]; 1623 1624 shader_arb_get_dst_param(ins, dst, dst_name); 1625 1626 /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed 1627 * 1628 * Keep in mind that src_name[1] can be "TB" and src_name[0] can be "TA" because modifiers like _x2 are valid 1629 * with bem. So delay loading the first parameter until after the perturbation calculation which needs two 1630 * temps is done. 1631 */ 1632 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1633 shader_addline(buffer, "SWZ TA, bumpenvmat%d, x, z, 0, 0;\n", sampler_code); 1634 shader_addline(buffer, "DP3 TC.r, TA, %s;\n", src_name[1]); 1635 shader_addline(buffer, "SWZ TA, bumpenvmat%d, y, w, 0, 0;\n", sampler_code); 1636 shader_addline(buffer, "DP3 TC.g, TA, %s;\n", src_name[1]); 1637 1638 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1639 shader_addline(buffer, "ADD %s, %s, TC;\n", dst_name, src_name[0]); 1640 } 1641 1642 static DWORD negate_modifiers(DWORD mod, char *extra_char) 1643 { 1644 *extra_char = ' '; 1645 switch(mod) 1646 { 1647 case WINED3DSPSM_NONE: return WINED3DSPSM_NEG; 1648 case WINED3DSPSM_NEG: return WINED3DSPSM_NONE; 1649 case WINED3DSPSM_BIAS: return WINED3DSPSM_BIASNEG; 1650 case WINED3DSPSM_BIASNEG: return WINED3DSPSM_BIAS; 1651 case WINED3DSPSM_SIGN: return WINED3DSPSM_SIGNNEG; 1652 case WINED3DSPSM_SIGNNEG: return WINED3DSPSM_SIGN; 1653 case WINED3DSPSM_COMP: *extra_char = '-'; return WINED3DSPSM_COMP; 1654 case WINED3DSPSM_X2: return WINED3DSPSM_X2NEG; 1655 case WINED3DSPSM_X2NEG: return WINED3DSPSM_X2; 1656 case WINED3DSPSM_DZ: *extra_char = '-'; return WINED3DSPSM_DZ; 1657 case WINED3DSPSM_DW: *extra_char = '-'; return WINED3DSPSM_DW; 1658 case WINED3DSPSM_ABS: return WINED3DSPSM_ABSNEG; 1659 case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS; 1660 } 1661 FIXME("Unknown modifier %u\n", mod); 1662 return mod; 1663 } 1664 1665 static void pshader_hw_cnd(const struct wined3d_shader_instruction *ins) 1666 { 1667 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1668 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1669 char dst_name[50]; 1670 char src_name[3][50]; 1671 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 1672 ins->ctx->reg_maps->shader_version.minor); 1673 1674 shader_arb_get_dst_param(ins, dst, dst_name); 1675 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1676 1677 if (shader_version <= WINED3D_SHADER_VERSION(1, 3) && ins->coissue 1678 && ins->dst->write_mask != WINED3DSP_WRITEMASK_3) 1679 { 1680 shader_addline(buffer, "MOV%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[1]); 1681 } 1682 else 1683 { 1684 struct wined3d_shader_src_param src0_copy = ins->src[0]; 1685 char extra_neg; 1686 1687 /* src0 may have a negate srcmod set, so we can't blindly add "-" to the name */ 1688 src0_copy.modifiers = negate_modifiers(src0_copy.modifiers, &extra_neg); 1689 1690 shader_arb_get_src_param(ins, &src0_copy, 0, src_name[0]); 1691 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1692 shader_addline(buffer, "ADD TA, %c%s, coefdiv.x;\n", extra_neg, src_name[0]); 1693 shader_addline(buffer, "CMP%s %s, TA, %s, %s;\n", shader_arb_get_modifier(ins), 1694 dst_name, src_name[1], src_name[2]); 1695 } 1696 } 1697 1698 static void pshader_hw_cmp(const struct wined3d_shader_instruction *ins) 1699 { 1700 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1701 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1702 char dst_name[50]; 1703 char src_name[3][50]; 1704 1705 shader_arb_get_dst_param(ins, dst, dst_name); 1706 1707 /* Generate input register names (with modifiers) */ 1708 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1709 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1710 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1711 1712 shader_addline(buffer, "CMP%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 1713 dst_name, src_name[0], src_name[2], src_name[1]); 1714 } 1715 1716 /** Process the WINED3DSIO_DP2ADD instruction in ARB. 1717 * dst = dot2(src0, src1) + src2 */ 1718 static void pshader_hw_dp2add(const struct wined3d_shader_instruction *ins) 1719 { 1720 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1721 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1722 char dst_name[50]; 1723 char src_name[3][50]; 1724 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1725 1726 shader_arb_get_dst_param(ins, dst, dst_name); 1727 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1728 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1729 1730 if(ctx->target_version >= NV3) 1731 { 1732 /* GL_NV_fragment_program2 has a 1:1 matching instruction */ 1733 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1734 shader_addline(buffer, "DP2A%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 1735 dst_name, src_name[0], src_name[1], src_name[2]); 1736 } 1737 else if(ctx->target_version >= NV2) 1738 { 1739 /* dst.x = src2.?, src0.x, src1.x + src0.y * src1.y 1740 * dst.y = src2.?, src0.x, src1.z + src0.y * src1.w 1741 * dst.z = src2.?, src0.x, src1.x + src0.y * src1.y 1742 * dst.z = src2.?, src0.x, src1.z + src0.y * src1.w 1743 * 1744 * Make sure that src1.zw = src1.xy, then we get a classic dp2add 1745 * 1746 * .xyxy and other swizzles that we could get with this are not valid in 1747 * plain ARBfp, but luckily the NV extension grammar lifts this limitation. 1748 */ 1749 struct wined3d_shader_src_param tmp_param = ins->src[1]; 1750 DWORD swizzle = tmp_param.swizzle & 0xf; /* Selects .xy */ 1751 tmp_param.swizzle = swizzle | (swizzle << 4); /* Creates .xyxy */ 1752 1753 shader_arb_get_src_param(ins, &tmp_param, 1, src_name[1]); 1754 1755 shader_addline(buffer, "X2D%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 1756 dst_name, src_name[2], src_name[0], src_name[1]); 1757 } 1758 else 1759 { 1760 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1761 /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2] 1762 * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite 1763 */ 1764 shader_addline(buffer, "MOV TA, %s;\n", src_name[0]); 1765 shader_addline(buffer, "MOV TA.z, 0.0;\n"); 1766 shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]); 1767 shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]); 1768 } 1769 } 1770 1771 /* Map the opcode 1-to-1 to the GL code */ 1772 static void shader_hw_map2gl(const struct wined3d_shader_instruction *ins) 1773 { 1774 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1775 const char *instruction; 1776 char arguments[256], dst_str[50]; 1777 unsigned int i; 1778 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1779 1780 switch (ins->handler_idx) 1781 { 1782 case WINED3DSIH_ABS: instruction = "ABS"; break; 1783 case WINED3DSIH_ADD: instruction = "ADD"; break; 1784 case WINED3DSIH_CRS: instruction = "XPD"; break; 1785 case WINED3DSIH_DP3: instruction = "DP3"; break; 1786 case WINED3DSIH_DP4: instruction = "DP4"; break; 1787 case WINED3DSIH_DST: instruction = "DST"; break; 1788 case WINED3DSIH_FRC: instruction = "FRC"; break; 1789 case WINED3DSIH_LIT: instruction = "LIT"; break; 1790 case WINED3DSIH_LRP: instruction = "LRP"; break; 1791 case WINED3DSIH_MAD: instruction = "MAD"; break; 1792 case WINED3DSIH_MAX: instruction = "MAX"; break; 1793 case WINED3DSIH_MIN: instruction = "MIN"; break; 1794 case WINED3DSIH_MOV: instruction = "MOV"; break; 1795 case WINED3DSIH_MUL: instruction = "MUL"; break; 1796 case WINED3DSIH_SGE: instruction = "SGE"; break; 1797 case WINED3DSIH_SLT: instruction = "SLT"; break; 1798 case WINED3DSIH_SUB: instruction = "SUB"; break; 1799 case WINED3DSIH_MOVA:instruction = "ARR"; break; 1800 case WINED3DSIH_DSX: instruction = "DDX"; break; 1801 default: instruction = ""; 1802 FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 1803 break; 1804 } 1805 1806 /* Note that shader_arb_add_dst_param() adds spaces. */ 1807 arguments[0] = '\0'; 1808 shader_arb_get_dst_param(ins, dst, dst_str); 1809 for (i = 0; i < ins->src_count; ++i) 1810 { 1811 char operand[100]; 1812 strcat(arguments, ", "); 1813 shader_arb_get_src_param(ins, &ins->src[i], i, operand); 1814 strcat(arguments, operand); 1815 } 1816 shader_addline(buffer, "%s%s %s%s;\n", instruction, shader_arb_get_modifier(ins), dst_str, arguments); 1817 } 1818 1819 static void shader_hw_nop(const struct wined3d_shader_instruction *ins) {} 1820 1821 static DWORD shader_arb_select_component(DWORD swizzle, DWORD component) 1822 { 1823 return ((swizzle >> 2 * component) & 0x3) * 0x55; 1824 } 1825 1826 static void shader_hw_mov(const struct wined3d_shader_instruction *ins) 1827 { 1828 const struct wined3d_shader *shader = ins->ctx->shader; 1829 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 1830 BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type); 1831 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1832 const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO); 1833 const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE); 1834 const char *two = arb_get_helper_value(reg_maps->shader_version.type, ARB_TWO); 1835 1836 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1837 char src0_param[256]; 1838 1839 if (ins->handler_idx == WINED3DSIH_MOVA) 1840 { 1841 const struct arb_vshader_private *shader_data = shader->backend_data; 1842 char write_mask[6]; 1843 const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET); 1844 1845 if(ctx->target_version >= NV2) { 1846 shader_hw_map2gl(ins); 1847 return; 1848 } 1849 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param); 1850 shader_arb_get_write_mask(ins, &ins->dst[0], write_mask); 1851 1852 /* This implements the mova formula used in GLSL. The first two instructions 1853 * prepare the sign() part. Note that it is fine to have my_sign(0.0) = 1.0 1854 * in this case: 1855 * mova A0.x, 0.0 1856 * 1857 * A0.x = arl(floor(abs(0.0) + 0.5) * 1.0) = floor(0.5) = 0.0 since arl does a floor 1858 * 1859 * The ARL is performed when A0 is used - the requested component is read from A0_SHADOW into 1860 * A0.x. We can use the overwritten component of A0_shadow as temporary storage for the sign. 1861 */ 1862 shader_addline(buffer, "SGE A0_SHADOW%s, %s, %s;\n", write_mask, src0_param, zero); 1863 shader_addline(buffer, "MAD A0_SHADOW%s, A0_SHADOW, %s, -%s;\n", write_mask, two, one); 1864 1865 shader_addline(buffer, "ABS TA%s, %s;\n", write_mask, src0_param); 1866 shader_addline(buffer, "ADD TA%s, TA, rel_addr_const.x;\n", write_mask); 1867 shader_addline(buffer, "FLR TA%s, TA;\n", write_mask); 1868 if (shader_data->rel_offset) 1869 { 1870 shader_addline(buffer, "ADD TA%s, TA, %s;\n", write_mask, offset); 1871 } 1872 shader_addline(buffer, "MUL A0_SHADOW%s, TA, A0_SHADOW;\n", write_mask); 1873 1874 ((struct shader_arb_ctx_priv *)ins->ctx->backend_data)->addr_reg[0] = '\0'; 1875 } 1876 else if (reg_maps->shader_version.major == 1 1877 && !shader_is_pshader_version(reg_maps->shader_version.type) 1878 && ins->dst[0].reg.type == WINED3DSPR_ADDR) 1879 { 1880 const struct arb_vshader_private *shader_data = shader->backend_data; 1881 src0_param[0] = '\0'; 1882 1883 if (shader_data->rel_offset && ctx->target_version == ARB) 1884 { 1885 const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET); 1886 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param); 1887 shader_addline(buffer, "ADD TA.x, %s, %s;\n", src0_param, offset); 1888 shader_addline(buffer, "ARL A0.x, TA.x;\n"); 1889 } 1890 else 1891 { 1892 /* Apple's ARB_vertex_program implementation does not accept an ARL source argument 1893 * with more than one component. Thus replicate the first source argument over all 1894 * 4 components. For example, .xyzw -> .x (or better: .xxxx), .zwxy -> .z, etc) */ 1895 struct wined3d_shader_src_param tmp_src = ins->src[0]; 1896 tmp_src.swizzle = shader_arb_select_component(tmp_src.swizzle, 0); 1897 shader_arb_get_src_param(ins, &tmp_src, 0, src0_param); 1898 shader_addline(buffer, "ARL A0.x, %s;\n", src0_param); 1899 } 1900 } 1901 else if (ins->dst[0].reg.type == WINED3DSPR_COLOROUT && !ins->dst[0].reg.idx[0].offset && pshader) 1902 { 1903 if (ctx->ps_post_process && shader->u.ps.color0_mov) 1904 { 1905 shader_addline(buffer, "#mov handled in srgb write or fog code\n"); 1906 return; 1907 } 1908 shader_hw_map2gl(ins); 1909 } 1910 else 1911 { 1912 shader_hw_map2gl(ins); 1913 } 1914 } 1915 1916 static void pshader_hw_texkill(const struct wined3d_shader_instruction *ins) 1917 { 1918 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1919 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 1920 char reg_dest[40]; 1921 1922 /* No swizzles are allowed in d3d's texkill. PS 1.x ignores the 4th component as documented, 1923 * but >= 2.0 honors it (undocumented, but tested by the d3d9 testsuite) 1924 */ 1925 shader_arb_get_dst_param(ins, dst, reg_dest); 1926 1927 if (ins->ctx->reg_maps->shader_version.major >= 2) 1928 { 1929 const char *kilsrc = "TA"; 1930 BOOL is_color; 1931 1932 shader_arb_get_register_name(ins, &dst->reg, reg_dest, &is_color); 1933 if(dst->write_mask == WINED3DSP_WRITEMASK_ALL) 1934 { 1935 kilsrc = reg_dest; 1936 } 1937 else 1938 { 1939 /* Sigh. KIL doesn't support swizzles/writemasks. KIL passes a writemask, but ".xy" for example 1940 * is not valid as a swizzle in ARB (needs ".xyyy"). Use SWZ to load the register properly, and set 1941 * masked out components to 0(won't kill) 1942 */ 1943 char x = '0', y = '0', z = '0', w = '0'; 1944 if(dst->write_mask & WINED3DSP_WRITEMASK_0) x = 'x'; 1945 if(dst->write_mask & WINED3DSP_WRITEMASK_1) y = 'y'; 1946 if(dst->write_mask & WINED3DSP_WRITEMASK_2) z = 'z'; 1947 if(dst->write_mask & WINED3DSP_WRITEMASK_3) w = 'w'; 1948 shader_addline(buffer, "SWZ TA, %s, %c, %c, %c, %c;\n", reg_dest, x, y, z, w); 1949 } 1950 shader_addline(buffer, "KIL %s;\n", kilsrc); 1951 } 1952 else 1953 { 1954 /* ARB fp doesn't like swizzles on the parameter of the KIL instruction. To mask the 4th component, 1955 * copy the register into our general purpose TMP variable, overwrite .w and pass TMP to KIL 1956 * 1957 * ps_1_3 shaders use the texcoord incarnation of the Tx register. ps_1_4 shaders can use the same, 1958 * or pass in any temporary register(in shader phase 2) 1959 */ 1960 if (ins->ctx->reg_maps->shader_version.minor <= 3) 1961 sprintf(reg_dest, "fragment.texcoord[%u]", dst->reg.idx[0].offset); 1962 else 1963 shader_arb_get_dst_param(ins, dst, reg_dest); 1964 shader_addline(buffer, "SWZ TA, %s, x, y, z, 1;\n", reg_dest); 1965 shader_addline(buffer, "KIL TA;\n"); 1966 } 1967 } 1968 1969 static void pshader_hw_tex(const struct wined3d_shader_instruction *ins) 1970 { 1971 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1972 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1973 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 1974 ins->ctx->reg_maps->shader_version.minor); 1975 struct wined3d_shader_src_param src; 1976 1977 char reg_dest[40]; 1978 char reg_coord[40]; 1979 DWORD reg_sampler_code; 1980 WORD myflags = 0; 1981 BOOL swizzle_coord = FALSE; 1982 1983 /* All versions have a destination register */ 1984 shader_arb_get_dst_param(ins, dst, reg_dest); 1985 1986 /* 1.0-1.4: Use destination register number as texture code. 1987 2.0+: Use provided sampler number as texture code. */ 1988 if (shader_version < WINED3D_SHADER_VERSION(2,0)) 1989 reg_sampler_code = dst->reg.idx[0].offset; 1990 else 1991 reg_sampler_code = ins->src[1].reg.idx[0].offset; 1992 1993 /* 1.0-1.3: Use the texcoord varying. 1994 1.4+: Use provided coordinate source register. */ 1995 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 1996 sprintf(reg_coord, "fragment.texcoord[%u]", reg_sampler_code); 1997 else { 1998 /* TEX is the only instruction that can handle DW and DZ natively */ 1999 src = ins->src[0]; 2000 if(src.modifiers == WINED3DSPSM_DW) src.modifiers = WINED3DSPSM_NONE; 2001 if(src.modifiers == WINED3DSPSM_DZ) src.modifiers = WINED3DSPSM_NONE; 2002 shader_arb_get_src_param(ins, &src, 0, reg_coord); 2003 } 2004 2005 /* projection flag: 2006 * 1.1, 1.2, 1.3: Use WINED3D_TSS_TEXTURETRANSFORMFLAGS 2007 * 1.4: Use WINED3DSPSM_DZ or WINED3DSPSM_DW on src[0] 2008 * 2.0+: Use WINED3DSI_TEXLD_PROJECT on the opcode 2009 */ 2010 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 2011 { 2012 DWORD flags = 0; 2013 if (reg_sampler_code < MAX_TEXTURES) 2014 flags = priv->cur_ps_args->super.tex_transform >> reg_sampler_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT; 2015 if (flags & WINED3D_PSARGS_PROJECTED) 2016 { 2017 myflags |= TEX_PROJ; 2018 if ((flags & ~WINED3D_PSARGS_PROJECTED) == WINED3D_TTFF_COUNT3) 2019 swizzle_coord = TRUE; 2020 } 2021 } 2022 else if (shader_version < WINED3D_SHADER_VERSION(2,0)) 2023 { 2024 enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers; 2025 if (src_mod == WINED3DSPSM_DZ) 2026 { 2027 swizzle_coord = TRUE; 2028 myflags |= TEX_PROJ; 2029 } else if(src_mod == WINED3DSPSM_DW) { 2030 myflags |= TEX_PROJ; 2031 } 2032 } else { 2033 if (ins->flags & WINED3DSI_TEXLD_PROJECT) myflags |= TEX_PROJ; 2034 if (ins->flags & WINED3DSI_TEXLD_BIAS) myflags |= TEX_BIAS; 2035 } 2036 2037 if (swizzle_coord) 2038 { 2039 /* TXP cannot handle DZ natively, so move the z coordinate to .w. 2040 * reg_coord is a read-only varying register, so we need a temp reg */ 2041 shader_addline(ins->ctx->buffer, "SWZ TA, %s, x, y, z, z;\n", reg_coord); 2042 strcpy(reg_coord, "TA"); 2043 } 2044 2045 shader_hw_sample(ins, reg_sampler_code, reg_dest, reg_coord, myflags, NULL, NULL); 2046 } 2047 2048 static void pshader_hw_texcoord(const struct wined3d_shader_instruction *ins) 2049 { 2050 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2051 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2052 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 2053 ins->ctx->reg_maps->shader_version.minor); 2054 char dst_str[50]; 2055 2056 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 2057 { 2058 DWORD reg = dst->reg.idx[0].offset; 2059 2060 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2061 shader_addline(buffer, "MOV_SAT %s, fragment.texcoord[%u];\n", dst_str, reg); 2062 } else { 2063 char reg_src[40]; 2064 2065 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src); 2066 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2067 shader_addline(buffer, "MOV %s, %s;\n", dst_str, reg_src); 2068 } 2069 } 2070 2071 static void pshader_hw_texreg2ar(const struct wined3d_shader_instruction *ins) 2072 { 2073 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2074 DWORD flags = 0; 2075 2076 DWORD reg1 = ins->dst[0].reg.idx[0].offset; 2077 char dst_str[50]; 2078 char src_str[50]; 2079 2080 /* Note that texreg2ar treats Tx as a temporary register, not as a varying */ 2081 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2082 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 2083 /* Move .x first in case src_str is "TA" */ 2084 shader_addline(buffer, "MOV TA.y, %s.x;\n", src_str); 2085 shader_addline(buffer, "MOV TA.x, %s.w;\n", src_str); 2086 if (reg1 < MAX_TEXTURES) 2087 { 2088 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2089 flags = priv->cur_ps_args->super.tex_transform >> reg1 * WINED3D_PSARGS_TEXTRANSFORM_SHIFT; 2090 } 2091 shader_hw_sample(ins, reg1, dst_str, "TA", flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2092 } 2093 2094 static void pshader_hw_texreg2gb(const struct wined3d_shader_instruction *ins) 2095 { 2096 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2097 2098 DWORD reg1 = ins->dst[0].reg.idx[0].offset; 2099 char dst_str[50]; 2100 char src_str[50]; 2101 2102 /* Note that texreg2gb treats Tx as a temporary register, not as a varying */ 2103 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2104 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 2105 shader_addline(buffer, "MOV TA.x, %s.y;\n", src_str); 2106 shader_addline(buffer, "MOV TA.y, %s.z;\n", src_str); 2107 shader_hw_sample(ins, reg1, dst_str, "TA", 0, NULL, NULL); 2108 } 2109 2110 static void pshader_hw_texreg2rgb(const struct wined3d_shader_instruction *ins) 2111 { 2112 DWORD reg1 = ins->dst[0].reg.idx[0].offset; 2113 char dst_str[50]; 2114 char src_str[50]; 2115 2116 /* Note that texreg2rg treats Tx as a temporary register, not as a varying */ 2117 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2118 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 2119 shader_hw_sample(ins, reg1, dst_str, src_str, 0, NULL, NULL); 2120 } 2121 2122 static void pshader_hw_texbem(const struct wined3d_shader_instruction *ins) 2123 { 2124 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2125 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2126 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2127 char reg_coord[40], dst_reg[50], src_reg[50]; 2128 DWORD reg_dest_code; 2129 2130 /* All versions have a destination register. The Tx where the texture coordinates come 2131 * from is the varying incarnation of the texture register 2132 */ 2133 reg_dest_code = dst->reg.idx[0].offset; 2134 shader_arb_get_dst_param(ins, &ins->dst[0], dst_reg); 2135 shader_arb_get_src_param(ins, &ins->src[0], 0, src_reg); 2136 sprintf(reg_coord, "fragment.texcoord[%u]", reg_dest_code); 2137 2138 /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed 2139 * The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register 2140 * 2141 * GL_NV_fragment_program_option could handle this in one instruction via X2D: 2142 * X2D TA.xy, fragment.texcoord, T%u, bumpenvmat%u.xzyw 2143 * 2144 * However, the NV extensions are never enabled for <= 2.0 shaders because of the performance penalty that 2145 * comes with it, and texbem is an 1.x only instruction. No 1.x instruction forces us to enable the NV 2146 * extension. 2147 */ 2148 shader_addline(buffer, "SWZ TB, bumpenvmat%d, x, z, 0, 0;\n", reg_dest_code); 2149 shader_addline(buffer, "DP3 TA.x, TB, %s;\n", src_reg); 2150 shader_addline(buffer, "SWZ TB, bumpenvmat%d, y, w, 0, 0;\n", reg_dest_code); 2151 shader_addline(buffer, "DP3 TA.y, TB, %s;\n", src_reg); 2152 2153 /* with projective textures, texbem only divides the static texture coord, not the displacement, 2154 * so we can't let the GL handle this. 2155 */ 2156 if ((priv->cur_ps_args->super.tex_transform >> reg_dest_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT) 2157 & WINED3D_PSARGS_PROJECTED) 2158 { 2159 shader_addline(buffer, "RCP TB.w, %s.w;\n", reg_coord); 2160 shader_addline(buffer, "MUL TB.xy, %s, TB.w;\n", reg_coord); 2161 shader_addline(buffer, "ADD TA.xy, TA, TB;\n"); 2162 } else { 2163 shader_addline(buffer, "ADD TA.xy, TA, %s;\n", reg_coord); 2164 } 2165 2166 shader_hw_sample(ins, reg_dest_code, dst_reg, "TA", 0, NULL, NULL); 2167 2168 if (ins->handler_idx == WINED3DSIH_TEXBEML) 2169 { 2170 /* No src swizzles are allowed, so this is ok */ 2171 shader_addline(buffer, "MAD TA, %s.z, luminance%d.x, luminance%d.y;\n", 2172 src_reg, reg_dest_code, reg_dest_code); 2173 shader_addline(buffer, "MUL %s, %s, TA;\n", dst_reg, dst_reg); 2174 } 2175 } 2176 2177 static void pshader_hw_texm3x2pad(const struct wined3d_shader_instruction *ins) 2178 { 2179 DWORD reg = ins->dst[0].reg.idx[0].offset; 2180 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2181 char src0_name[50], dst_name[50]; 2182 BOOL is_color; 2183 struct wined3d_shader_register tmp_reg = ins->dst[0].reg; 2184 2185 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2186 /* The next instruction will be a texm3x2tex or texm3x2depth that writes to the uninitialized 2187 * T<reg+1> register. Use this register to store the calculated vector 2188 */ 2189 tmp_reg.idx[0].offset = reg + 1; 2190 shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color); 2191 shader_addline(buffer, "DP3 %s.x, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name); 2192 } 2193 2194 static void pshader_hw_texm3x2tex(const struct wined3d_shader_instruction *ins) 2195 { 2196 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2197 DWORD flags; 2198 DWORD reg = ins->dst[0].reg.idx[0].offset; 2199 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2200 char dst_str[50]; 2201 char src0_name[50]; 2202 char dst_reg[50]; 2203 BOOL is_color; 2204 2205 /* We know that we're writing to the uninitialized T<reg> register, so use it for temporary storage */ 2206 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2207 2208 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2209 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2210 shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2211 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2212 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2213 } 2214 2215 static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins) 2216 { 2217 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2218 DWORD reg = ins->dst[0].reg.idx[0].offset; 2219 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2220 char src0_name[50], dst_name[50]; 2221 struct wined3d_shader_register tmp_reg = ins->dst[0].reg; 2222 BOOL is_color; 2223 2224 /* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with 2225 * incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination 2226 * register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared') 2227 */ 2228 tmp_reg.idx[0].offset = reg + 2 - tex_mx->current_row; 2229 shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color); 2230 2231 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2232 shader_addline(buffer, "DP3 %s.%c, fragment.texcoord[%u], %s;\n", 2233 dst_name, 'x' + tex_mx->current_row, reg, src0_name); 2234 tex_mx->texcoord_w[tex_mx->current_row++] = reg; 2235 } 2236 2237 static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins) 2238 { 2239 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2240 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2241 DWORD flags; 2242 DWORD reg = ins->dst[0].reg.idx[0].offset; 2243 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2244 char dst_str[50]; 2245 char src0_name[50], dst_name[50]; 2246 BOOL is_color; 2247 2248 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2249 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2250 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name); 2251 2252 /* Sample the texture using the calculated coordinates */ 2253 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2254 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2255 shader_hw_sample(ins, reg, dst_str, dst_name, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2256 tex_mx->current_row = 0; 2257 } 2258 2259 static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins) 2260 { 2261 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2262 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2263 DWORD flags; 2264 DWORD reg = ins->dst[0].reg.idx[0].offset; 2265 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2266 char dst_str[50]; 2267 char src0_name[50]; 2268 char dst_reg[50]; 2269 BOOL is_color; 2270 2271 /* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all 2272 * components for temporary data storage 2273 */ 2274 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2275 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2276 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2277 2278 /* Construct the eye-ray vector from w coordinates */ 2279 shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[0]); 2280 shader_addline(buffer, "MOV TB.y, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[1]); 2281 shader_addline(buffer, "MOV TB.z, fragment.texcoord[%u].w;\n", reg); 2282 2283 /* Calculate reflection vector 2284 */ 2285 shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg); 2286 /* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */ 2287 shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg); 2288 shader_addline(buffer, "RCP TB.w, TB.w;\n"); 2289 shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg); 2290 shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg); 2291 shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg); 2292 2293 /* Sample the texture using the calculated coordinates */ 2294 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2295 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2296 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2297 tex_mx->current_row = 0; 2298 } 2299 2300 static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins) 2301 { 2302 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2303 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 2304 DWORD flags; 2305 DWORD reg = ins->dst[0].reg.idx[0].offset; 2306 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2307 char dst_str[50]; 2308 char src0_name[50]; 2309 char src1_name[50]; 2310 char dst_reg[50]; 2311 BOOL is_color; 2312 2313 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2314 shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name); 2315 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2316 /* Note: dst_reg.xy is input here, generated by two texm3x3pad instructions */ 2317 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2318 2319 /* Calculate reflection vector. 2320 * 2321 * dot(N, E) 2322 * dst_reg.xyz = 2 * --------- * N - E 2323 * dot(N, N) 2324 * 2325 * Which normalizes the normal vector 2326 */ 2327 shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name); 2328 shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg); 2329 shader_addline(buffer, "RCP TC.w, TC.w;\n"); 2330 shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg); 2331 shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg); 2332 shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name); 2333 2334 /* Sample the texture using the calculated coordinates */ 2335 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2336 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 2337 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2338 tex_mx->current_row = 0; 2339 } 2340 2341 static void pshader_hw_texdepth(const struct wined3d_shader_instruction *ins) 2342 { 2343 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2344 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2345 char dst_name[50]; 2346 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2347 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2348 2349 /* texdepth has an implicit destination, the fragment depth value. It's only parameter, 2350 * which is essentially an input, is the destination register because it is the first 2351 * parameter. According to the msdn, this must be register r5, but let's keep it more flexible 2352 * here(writemasks/swizzles are not valid on texdepth) 2353 */ 2354 shader_arb_get_dst_param(ins, dst, dst_name); 2355 2356 /* According to the msdn, the source register(must be r5) is unusable after 2357 * the texdepth instruction, so we're free to modify it 2358 */ 2359 shader_addline(buffer, "MIN %s.y, %s.y, %s;\n", dst_name, dst_name, one); 2360 2361 /* How to deal with the special case dst_name.g == 0? if r != 0, then 2362 * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct 2363 * result. But if r = 0.0, then 0 * inf = 0, which is incorrect. 2364 */ 2365 shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name); 2366 shader_addline(buffer, "MUL TA.x, %s.x, %s.y;\n", dst_name, dst_name); 2367 shader_addline(buffer, "MIN TA.x, TA.x, %s;\n", one); 2368 shader_addline(buffer, "MAX result.depth, TA.x, %s;\n", zero); 2369 } 2370 2371 /** Process the WINED3DSIO_TEXDP3TEX instruction in ARB: 2372 * Take a 3-component dot product of the TexCoord[dstreg] and src, 2373 * then perform a 1D texture lookup from stage dstregnum, place into dst. */ 2374 static void pshader_hw_texdp3tex(const struct wined3d_shader_instruction *ins) 2375 { 2376 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2377 DWORD sampler_idx = ins->dst[0].reg.idx[0].offset; 2378 char src0[50]; 2379 char dst_str[50]; 2380 2381 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2382 shader_addline(buffer, "MOV TB, 0.0;\n"); 2383 shader_addline(buffer, "DP3 TB.x, fragment.texcoord[%u], %s;\n", sampler_idx, src0); 2384 2385 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2386 shader_hw_sample(ins, sampler_idx, dst_str, "TB", 0 /* Only one coord, can't be projected */, NULL, NULL); 2387 } 2388 2389 /** Process the WINED3DSIO_TEXDP3 instruction in ARB: 2390 * Take a 3-component dot product of the TexCoord[dstreg] and src. */ 2391 static void pshader_hw_texdp3(const struct wined3d_shader_instruction *ins) 2392 { 2393 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2394 char src0[50]; 2395 char dst_str[50]; 2396 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2397 2398 /* Handle output register */ 2399 shader_arb_get_dst_param(ins, dst, dst_str); 2400 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2401 shader_addline(buffer, "DP3 %s, fragment.texcoord[%u], %s;\n", dst_str, dst->reg.idx[0].offset, src0); 2402 } 2403 2404 /** Process the WINED3DSIO_TEXM3X3 instruction in ARB 2405 * Perform the 3rd row of a 3x3 matrix multiply */ 2406 static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins) 2407 { 2408 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2409 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2410 char dst_str[50], dst_name[50]; 2411 char src0[50]; 2412 BOOL is_color; 2413 2414 shader_arb_get_dst_param(ins, dst, dst_str); 2415 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2416 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2417 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0); 2418 shader_addline(buffer, "MOV %s, %s;\n", dst_str, dst_name); 2419 } 2420 2421 /** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB: 2422 * Last row of a 3x2 matrix multiply, use the result to calculate the depth: 2423 * Calculate tmp0.y = TexCoord[dstreg] . src.xyz; (tmp0.x has already been calculated) 2424 * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y 2425 */ 2426 static void pshader_hw_texm3x2depth(const struct wined3d_shader_instruction *ins) 2427 { 2428 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2429 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2430 char src0[50], dst_name[50]; 2431 BOOL is_color; 2432 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2433 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2434 2435 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2436 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2437 shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0); 2438 2439 /* How to deal with the special case dst_name.g == 0? if r != 0, then 2440 * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct 2441 * result. But if r = 0.0, then 0 * inf = 0, which is incorrect. 2442 */ 2443 shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name); 2444 shader_addline(buffer, "MUL %s.x, %s.x, %s.y;\n", dst_name, dst_name, dst_name); 2445 shader_addline(buffer, "MIN %s.x, %s.x, %s;\n", dst_name, dst_name, one); 2446 shader_addline(buffer, "MAX result.depth, %s.x, %s;\n", dst_name, zero); 2447 } 2448 2449 /** Handles transforming all WINED3DSIO_M?x? opcodes for 2450 Vertex/Pixel shaders to ARB_vertex_program codes */ 2451 static void shader_hw_mnxn(const struct wined3d_shader_instruction *ins) 2452 { 2453 int i; 2454 int nComponents = 0; 2455 struct wined3d_shader_dst_param tmp_dst = {{0}}; 2456 struct wined3d_shader_src_param tmp_src[2] = {{{0}}}; 2457 struct wined3d_shader_instruction tmp_ins; 2458 2459 memset(&tmp_ins, 0, sizeof(tmp_ins)); 2460 2461 /* Set constants for the temporary argument */ 2462 tmp_ins.ctx = ins->ctx; 2463 tmp_ins.dst_count = 1; 2464 tmp_ins.dst = &tmp_dst; 2465 tmp_ins.src_count = 2; 2466 tmp_ins.src = tmp_src; 2467 2468 switch(ins->handler_idx) 2469 { 2470 case WINED3DSIH_M4x4: 2471 nComponents = 4; 2472 tmp_ins.handler_idx = WINED3DSIH_DP4; 2473 break; 2474 case WINED3DSIH_M4x3: 2475 nComponents = 3; 2476 tmp_ins.handler_idx = WINED3DSIH_DP4; 2477 break; 2478 case WINED3DSIH_M3x4: 2479 nComponents = 4; 2480 tmp_ins.handler_idx = WINED3DSIH_DP3; 2481 break; 2482 case WINED3DSIH_M3x3: 2483 nComponents = 3; 2484 tmp_ins.handler_idx = WINED3DSIH_DP3; 2485 break; 2486 case WINED3DSIH_M3x2: 2487 nComponents = 2; 2488 tmp_ins.handler_idx = WINED3DSIH_DP3; 2489 break; 2490 default: 2491 FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 2492 break; 2493 } 2494 2495 tmp_dst = ins->dst[0]; 2496 tmp_src[0] = ins->src[0]; 2497 tmp_src[1] = ins->src[1]; 2498 for (i = 0; i < nComponents; ++i) 2499 { 2500 tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i; 2501 shader_hw_map2gl(&tmp_ins); 2502 ++tmp_src[1].reg.idx[0].offset; 2503 } 2504 } 2505 2506 static DWORD abs_modifier(DWORD mod, BOOL *need_abs) 2507 { 2508 *need_abs = FALSE; 2509 2510 switch(mod) 2511 { 2512 case WINED3DSPSM_NONE: return WINED3DSPSM_ABS; 2513 case WINED3DSPSM_NEG: return WINED3DSPSM_ABS; 2514 case WINED3DSPSM_BIAS: *need_abs = TRUE; return WINED3DSPSM_BIAS; 2515 case WINED3DSPSM_BIASNEG: *need_abs = TRUE; return WINED3DSPSM_BIASNEG; 2516 case WINED3DSPSM_SIGN: *need_abs = TRUE; return WINED3DSPSM_SIGN; 2517 case WINED3DSPSM_SIGNNEG: *need_abs = TRUE; return WINED3DSPSM_SIGNNEG; 2518 case WINED3DSPSM_COMP: *need_abs = TRUE; return WINED3DSPSM_COMP; 2519 case WINED3DSPSM_X2: *need_abs = TRUE; return WINED3DSPSM_X2; 2520 case WINED3DSPSM_X2NEG: *need_abs = TRUE; return WINED3DSPSM_X2NEG; 2521 case WINED3DSPSM_DZ: *need_abs = TRUE; return WINED3DSPSM_DZ; 2522 case WINED3DSPSM_DW: *need_abs = TRUE; return WINED3DSPSM_DW; 2523 case WINED3DSPSM_ABS: return WINED3DSPSM_ABS; 2524 case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS; 2525 } 2526 FIXME("Unknown modifier %u\n", mod); 2527 return mod; 2528 } 2529 2530 static void shader_hw_scalar_op(const struct wined3d_shader_instruction *ins) 2531 { 2532 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2533 const char *instruction; 2534 struct wined3d_shader_src_param src0_copy = ins->src[0]; 2535 BOOL need_abs = FALSE; 2536 2537 char dst[50]; 2538 char src[50]; 2539 2540 switch(ins->handler_idx) 2541 { 2542 case WINED3DSIH_RSQ: instruction = "RSQ"; break; 2543 case WINED3DSIH_RCP: instruction = "RCP"; break; 2544 case WINED3DSIH_EXPP: 2545 if (ins->ctx->reg_maps->shader_version.major < 2) 2546 { 2547 instruction = "EXP"; 2548 break; 2549 } 2550 /* Drop through. */ 2551 case WINED3DSIH_EXP: 2552 instruction = "EX2"; 2553 break; 2554 case WINED3DSIH_LOG: 2555 case WINED3DSIH_LOGP: 2556 /* The precision requirements suggest that LOGP matches ARBvp's LOG 2557 * instruction, but notice that the output of those instructions is 2558 * different. */ 2559 src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs); 2560 instruction = "LG2"; 2561 break; 2562 default: instruction = ""; 2563 FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 2564 break; 2565 } 2566 2567 /* Dx sdk says .x is used if no swizzle is given, but our test shows that 2568 * .w is used. */ 2569 src0_copy.swizzle = shader_arb_select_component(src0_copy.swizzle, 3); 2570 2571 shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */ 2572 shader_arb_get_src_param(ins, &src0_copy, 0, src); 2573 2574 if(need_abs) 2575 { 2576 shader_addline(buffer, "ABS TA.w, %s;\n", src); 2577 shader_addline(buffer, "%s%s %s, TA.w;\n", instruction, shader_arb_get_modifier(ins), dst); 2578 } 2579 else 2580 { 2581 shader_addline(buffer, "%s%s %s, %s;\n", instruction, shader_arb_get_modifier(ins), dst, src); 2582 } 2583 2584 } 2585 2586 static void shader_hw_nrm(const struct wined3d_shader_instruction *ins) 2587 { 2588 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2589 char dst_name[50]; 2590 char src_name[50]; 2591 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2592 BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type); 2593 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2594 2595 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2596 shader_arb_get_src_param(ins, &ins->src[0], 1 /* Use TB */, src_name); 2597 2598 /* In D3D, NRM of a vector with length zero returns zero. Catch this situation, as 2599 * otherwise NRM or RSQ would return NaN */ 2600 if(pshader && priv->target_version >= NV3) 2601 { 2602 /* GL_NV_fragment_program2's NRM needs protection against length zero vectors too 2603 * 2604 * TODO: Find out if DP3+NRM+MOV is really faster than DP3+RSQ+MUL 2605 */ 2606 shader_addline(buffer, "DP3C TA, %s, %s;\n", src_name, src_name); 2607 shader_addline(buffer, "NRM%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name); 2608 shader_addline(buffer, "MOV %s (EQ), %s;\n", dst_name, zero); 2609 } 2610 else if(priv->target_version >= NV2) 2611 { 2612 shader_addline(buffer, "DP3C TA.x, %s, %s;\n", src_name, src_name); 2613 shader_addline(buffer, "RSQ TA.x (NE), TA.x;\n"); 2614 shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name, 2615 src_name); 2616 } 2617 else 2618 { 2619 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2620 2621 shader_addline(buffer, "DP3 TA.x, %s, %s;\n", src_name, src_name); 2622 /* Pass any non-zero value to RSQ if the input vector has a length of zero. The 2623 * RSQ result doesn't matter, as long as multiplying it by 0 returns 0. 2624 */ 2625 shader_addline(buffer, "SGE TA.y, -TA.x, %s;\n", zero); 2626 shader_addline(buffer, "MAD TA.x, %s, TA.y, TA.x;\n", one); 2627 2628 shader_addline(buffer, "RSQ TA.x, TA.x;\n"); 2629 /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/ 2630 shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name, 2631 src_name); 2632 } 2633 } 2634 2635 static void shader_hw_lrp(const struct wined3d_shader_instruction *ins) 2636 { 2637 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2638 char dst_name[50]; 2639 char src_name[3][50]; 2640 2641 /* ARB_fragment_program has a convenient LRP instruction */ 2642 if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) { 2643 shader_hw_map2gl(ins); 2644 return; 2645 } 2646 2647 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2648 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 2649 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 2650 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 2651 2652 shader_addline(buffer, "SUB TA, %s, %s;\n", src_name[1], src_name[2]); 2653 shader_addline(buffer, "MAD%s %s, %s, TA, %s;\n", shader_arb_get_modifier(ins), 2654 dst_name, src_name[0], src_name[2]); 2655 } 2656 2657 static void shader_hw_sincos(const struct wined3d_shader_instruction *ins) 2658 { 2659 /* This instruction exists in ARB, but the d3d instruction takes two extra parameters which 2660 * must contain fixed constants. So we need a separate function to filter those constants and 2661 * can't use map2gl 2662 */ 2663 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2664 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2665 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2666 char dst_name[50]; 2667 char src_name0[50], src_name1[50], src_name2[50]; 2668 BOOL is_color; 2669 2670 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 2671 if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) { 2672 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2673 /* No modifiers are supported on SCS */ 2674 shader_addline(buffer, "SCS %s, %s;\n", dst_name, src_name0); 2675 2676 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) 2677 { 2678 shader_arb_get_register_name(ins, &dst->reg, src_name0, &is_color); 2679 shader_addline(buffer, "MOV_SAT %s, %s;\n", dst_name, src_name0); 2680 } 2681 } else if(priv->target_version >= NV2) { 2682 shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color); 2683 2684 /* Sincos writemask must be .x, .y or .xy */ 2685 if(dst->write_mask & WINED3DSP_WRITEMASK_0) 2686 shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0); 2687 if(dst->write_mask & WINED3DSP_WRITEMASK_1) 2688 shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0); 2689 } else { 2690 /* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8 2691 * helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2. 2692 * 2693 * sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ... 2694 * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ... 2695 * 2696 * The constants we get are: 2697 * 2698 * +1 +1, -1 -1 +1 +1 -1 -1 2699 * ---- , ---- , ---- , ----- , ----- , ----- , ------ 2700 * 1!*2 2!*4 3!*8 4!*16 5!*32 6!*64 7!*128 2701 * 2702 * If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2): 2703 * 2704 * (x/2)^2 = x^2 / 4 2705 * (x/2)^3 = x^3 / 8 2706 * (x/2)^4 = x^4 / 16 2707 * (x/2)^5 = x^5 / 32 2708 * etc 2709 * 2710 * To get the final result: 2711 * sin(x) = 2 * sin(x/2) * cos(x/2) 2712 * cos(x) = cos(x/2)^2 - sin(x/2)^2 2713 * (from sin(x+y) and cos(x+y) rules) 2714 * 2715 * As per MSDN, dst.z is undefined after the operation, and so is 2716 * dst.x and dst.y if they're masked out by the writemask. Ie 2717 * sincos dst.y, src1, c0, c1 2718 * returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler 2719 * vsa.exe also stops with an error if the dest register is the same register as the source 2720 * register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also 2721 * indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0). 2722 */ 2723 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 2724 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2); 2725 shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color); 2726 2727 shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0); /* x ^ 2 */ 2728 shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0); /* x ^ 3 */ 2729 shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0); /* x ^ 4 */ 2730 shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0); /* x ^ 5 */ 2731 shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0); /* x ^ 6 */ 2732 shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0); /* x ^ 7 */ 2733 2734 /* sin(x/2) 2735 * 2736 * Unfortunately we don't get the constants in a DP4-capable form. Is there a way to 2737 * properly merge that with MULs in the code above? 2738 * The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe 2739 * we can merge the sine and cosine MAD rows to calculate them together. 2740 */ 2741 shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */ 2742 shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */ 2743 shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */ 2744 shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */ 2745 2746 /* cos(x/2) */ 2747 shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */ 2748 shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */ 2749 shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */ 2750 2751 if(dst->write_mask & WINED3DSP_WRITEMASK_0) { 2752 /* cos x */ 2753 shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n"); 2754 shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name); 2755 } 2756 if(dst->write_mask & WINED3DSP_WRITEMASK_1) { 2757 /* sin x */ 2758 shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name); 2759 shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name); 2760 } 2761 } 2762 } 2763 2764 static void shader_hw_sgn(const struct wined3d_shader_instruction *ins) 2765 { 2766 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2767 char dst_name[50]; 2768 char src_name[50]; 2769 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 2770 2771 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2772 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name); 2773 2774 /* SGN is only valid in vertex shaders */ 2775 if(ctx->target_version >= NV2) { 2776 shader_addline(buffer, "SSG%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name); 2777 return; 2778 } 2779 2780 /* If SRC > 0.0, -SRC < SRC = TRUE, otherwise false. 2781 * if SRC < 0.0, SRC < -SRC = TRUE. If neither is true, src = 0.0 2782 */ 2783 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) { 2784 shader_addline(buffer, "SLT %s, -%s, %s;\n", dst_name, src_name, src_name); 2785 } else { 2786 /* src contains TA? Write to the dest first. This won't overwrite our destination. 2787 * Then use TA, and calculate the final result 2788 * 2789 * Not reading from TA? Store the first result in TA to avoid overwriting the 2790 * destination if src reg = dst reg 2791 */ 2792 if(strstr(src_name, "TA")) 2793 { 2794 shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name); 2795 shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name); 2796 shader_addline(buffer, "ADD %s, %s, -TA;\n", dst_name, dst_name); 2797 } 2798 else 2799 { 2800 shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name); 2801 shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name); 2802 shader_addline(buffer, "ADD %s, TA, -%s;\n", dst_name, dst_name); 2803 } 2804 } 2805 } 2806 2807 static void shader_hw_dsy(const struct wined3d_shader_instruction *ins) 2808 { 2809 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2810 char src[50]; 2811 char dst[50]; 2812 char dst_name[50]; 2813 BOOL is_color; 2814 2815 shader_arb_get_dst_param(ins, &ins->dst[0], dst); 2816 shader_arb_get_src_param(ins, &ins->src[0], 0, src); 2817 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2818 2819 shader_addline(buffer, "DDY %s, %s;\n", dst, src); 2820 shader_addline(buffer, "MUL%s %s, %s, ycorrection.y;\n", shader_arb_get_modifier(ins), dst, dst_name); 2821 } 2822 2823 static void shader_hw_pow(const struct wined3d_shader_instruction *ins) 2824 { 2825 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2826 char src0[50], src1[50], dst[50]; 2827 struct wined3d_shader_src_param src0_copy = ins->src[0]; 2828 BOOL need_abs = FALSE; 2829 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2830 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2831 2832 /* POW operates on the absolute value of the input */ 2833 src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs); 2834 2835 shader_arb_get_dst_param(ins, &ins->dst[0], dst); 2836 shader_arb_get_src_param(ins, &src0_copy, 0, src0); 2837 shader_arb_get_src_param(ins, &ins->src[1], 1, src1); 2838 2839 if (need_abs) 2840 shader_addline(buffer, "ABS TA.x, %s;\n", src0); 2841 else 2842 shader_addline(buffer, "MOV TA.x, %s;\n", src0); 2843 2844 if (priv->target_version >= NV2) 2845 { 2846 shader_addline(buffer, "MOVC TA.y, %s;\n", src1); 2847 shader_addline(buffer, "POW%s %s, TA.x, TA.y;\n", shader_arb_get_modifier(ins), dst); 2848 shader_addline(buffer, "MOV %s (EQ.y), %s;\n", dst, one); 2849 } 2850 else 2851 { 2852 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2853 const char *flt_eps = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_EPS); 2854 2855 shader_addline(buffer, "ABS TA.y, %s;\n", src1); 2856 shader_addline(buffer, "SGE TA.y, -TA.y, %s;\n", zero); 2857 /* Possibly add flt_eps to avoid getting float special values */ 2858 shader_addline(buffer, "MAD TA.z, TA.y, %s, %s;\n", flt_eps, src1); 2859 shader_addline(buffer, "POW%s TA.x, TA.x, TA.z;\n", shader_arb_get_modifier(ins)); 2860 shader_addline(buffer, "MAD TA.x, -TA.x, TA.y, TA.x;\n"); 2861 shader_addline(buffer, "MAD %s, TA.y, %s, TA.x;\n", dst, one); 2862 } 2863 } 2864 2865 static void shader_hw_loop(const struct wined3d_shader_instruction *ins) 2866 { 2867 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2868 char src_name[50]; 2869 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2870 2871 /* src0 is aL */ 2872 shader_arb_get_src_param(ins, &ins->src[1], 0, src_name); 2873 2874 if(vshader) 2875 { 2876 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2877 struct list *e = list_head(&priv->control_frames); 2878 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2879 2880 if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n"); 2881 /* The constant loader makes sure to load -1 into iX.w */ 2882 shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name); 2883 shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop); 2884 shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop); 2885 } 2886 else 2887 { 2888 shader_addline(buffer, "LOOP %s;\n", src_name); 2889 } 2890 } 2891 2892 static void shader_hw_rep(const struct wined3d_shader_instruction *ins) 2893 { 2894 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2895 char src_name[50]; 2896 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2897 2898 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name); 2899 2900 /* The constant loader makes sure to load -1 into iX.w */ 2901 if(vshader) 2902 { 2903 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2904 struct list *e = list_head(&priv->control_frames); 2905 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2906 2907 if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n"); 2908 2909 shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name); 2910 shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop); 2911 shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop); 2912 } 2913 else 2914 { 2915 shader_addline(buffer, "REP %s;\n", src_name); 2916 } 2917 } 2918 2919 static void shader_hw_endloop(const struct wined3d_shader_instruction *ins) 2920 { 2921 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2922 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2923 2924 if(vshader) 2925 { 2926 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2927 struct list *e = list_head(&priv->control_frames); 2928 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2929 2930 shader_addline(buffer, "ARAC aL.xy, aL;\n"); 2931 shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop); 2932 shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop); 2933 2934 if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n"); 2935 } 2936 else 2937 { 2938 shader_addline(buffer, "ENDLOOP;\n"); 2939 } 2940 } 2941 2942 static void shader_hw_endrep(const struct wined3d_shader_instruction *ins) 2943 { 2944 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2945 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2946 2947 if(vshader) 2948 { 2949 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2950 struct list *e = list_head(&priv->control_frames); 2951 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2952 2953 shader_addline(buffer, "ARAC aL.xy, aL;\n"); 2954 shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop); 2955 shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop); 2956 2957 if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n"); 2958 } 2959 else 2960 { 2961 shader_addline(buffer, "ENDREP;\n"); 2962 } 2963 } 2964 2965 static const struct control_frame *find_last_loop(const struct shader_arb_ctx_priv *priv) 2966 { 2967 struct control_frame *control_frame; 2968 2969 LIST_FOR_EACH_ENTRY(control_frame, &priv->control_frames, struct control_frame, entry) 2970 { 2971 if(control_frame->type == LOOP || control_frame->type == REP) return control_frame; 2972 } 2973 ERR("Could not find loop for break\n"); 2974 return NULL; 2975 } 2976 2977 static void shader_hw_break(const struct wined3d_shader_instruction *ins) 2978 { 2979 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 2980 const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data); 2981 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2982 2983 if(vshader) 2984 { 2985 shader_addline(buffer, "BRA loop_%u_end;\n", control_frame->no.loop); 2986 } 2987 else 2988 { 2989 shader_addline(buffer, "BRK;\n"); 2990 } 2991 } 2992 2993 static const char *get_compare(enum wined3d_shader_rel_op op) 2994 { 2995 switch (op) 2996 { 2997 case WINED3D_SHADER_REL_OP_GT: return "GT"; 2998 case WINED3D_SHADER_REL_OP_EQ: return "EQ"; 2999 case WINED3D_SHADER_REL_OP_GE: return "GE"; 3000 case WINED3D_SHADER_REL_OP_LT: return "LT"; 3001 case WINED3D_SHADER_REL_OP_NE: return "NE"; 3002 case WINED3D_SHADER_REL_OP_LE: return "LE"; 3003 default: 3004 FIXME("Unrecognized operator %#x.\n", op); 3005 return "(\?\?)"; 3006 } 3007 } 3008 3009 static enum wined3d_shader_rel_op invert_compare(enum wined3d_shader_rel_op op) 3010 { 3011 switch (op) 3012 { 3013 case WINED3D_SHADER_REL_OP_GT: return WINED3D_SHADER_REL_OP_LE; 3014 case WINED3D_SHADER_REL_OP_EQ: return WINED3D_SHADER_REL_OP_NE; 3015 case WINED3D_SHADER_REL_OP_GE: return WINED3D_SHADER_REL_OP_LT; 3016 case WINED3D_SHADER_REL_OP_LT: return WINED3D_SHADER_REL_OP_GE; 3017 case WINED3D_SHADER_REL_OP_NE: return WINED3D_SHADER_REL_OP_EQ; 3018 case WINED3D_SHADER_REL_OP_LE: return WINED3D_SHADER_REL_OP_GT; 3019 default: 3020 FIXME("Unrecognized operator %#x.\n", op); 3021 return -1; 3022 } 3023 } 3024 3025 static void shader_hw_breakc(const struct wined3d_shader_instruction *ins) 3026 { 3027 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3028 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3029 const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data); 3030 char src_name0[50]; 3031 char src_name1[50]; 3032 const char *comp = get_compare(ins->flags); 3033 3034 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 3035 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 3036 3037 if(vshader) 3038 { 3039 /* SUBC CC, src0, src1" works only in pixel shaders, so use TA to throw 3040 * away the subtraction result 3041 */ 3042 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3043 shader_addline(buffer, "BRA loop_%u_end (%s.x);\n", control_frame->no.loop, comp); 3044 } 3045 else 3046 { 3047 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3048 shader_addline(buffer, "BRK (%s.x);\n", comp); 3049 } 3050 } 3051 3052 static void shader_hw_ifc(const struct wined3d_shader_instruction *ins) 3053 { 3054 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3055 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3056 struct list *e = list_head(&priv->control_frames); 3057 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 3058 const char *comp; 3059 char src_name0[50]; 3060 char src_name1[50]; 3061 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3062 3063 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 3064 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 3065 3066 if(vshader) 3067 { 3068 /* Invert the flag. We jump to the else label if the condition is NOT true */ 3069 comp = get_compare(invert_compare(ins->flags)); 3070 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3071 shader_addline(buffer, "BRA ifc_%u_else (%s.x);\n", control_frame->no.ifc, comp); 3072 } 3073 else 3074 { 3075 comp = get_compare(ins->flags); 3076 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3077 shader_addline(buffer, "IF %s.x;\n", comp); 3078 } 3079 } 3080 3081 static void shader_hw_else(const struct wined3d_shader_instruction *ins) 3082 { 3083 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3084 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3085 struct list *e = list_head(&priv->control_frames); 3086 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 3087 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3088 3089 if(vshader) 3090 { 3091 shader_addline(buffer, "BRA ifc_%u_endif;\n", control_frame->no.ifc); 3092 shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc); 3093 control_frame->had_else = TRUE; 3094 } 3095 else 3096 { 3097 shader_addline(buffer, "ELSE;\n"); 3098 } 3099 } 3100 3101 static void shader_hw_endif(const struct wined3d_shader_instruction *ins) 3102 { 3103 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3104 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3105 struct list *e = list_head(&priv->control_frames); 3106 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 3107 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3108 3109 if(vshader) 3110 { 3111 if(control_frame->had_else) 3112 { 3113 shader_addline(buffer, "ifc_%u_endif:\n", control_frame->no.ifc); 3114 } 3115 else 3116 { 3117 shader_addline(buffer, "#No else branch. else is endif\n"); 3118 shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc); 3119 } 3120 } 3121 else 3122 { 3123 shader_addline(buffer, "ENDIF;\n"); 3124 } 3125 } 3126 3127 static void shader_hw_texldd(const struct wined3d_shader_instruction *ins) 3128 { 3129 DWORD sampler_idx = ins->src[1].reg.idx[0].offset; 3130 char reg_dest[40]; 3131 char reg_src[3][40]; 3132 WORD flags = TEX_DERIV; 3133 3134 shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest); 3135 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src[0]); 3136 shader_arb_get_src_param(ins, &ins->src[2], 1, reg_src[1]); 3137 shader_arb_get_src_param(ins, &ins->src[3], 2, reg_src[2]); 3138 3139 if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ; 3140 if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS; 3141 3142 shader_hw_sample(ins, sampler_idx, reg_dest, reg_src[0], flags, reg_src[1], reg_src[2]); 3143 } 3144 3145 static void shader_hw_texldl(const struct wined3d_shader_instruction *ins) 3146 { 3147 DWORD sampler_idx = ins->src[1].reg.idx[0].offset; 3148 char reg_dest[40]; 3149 char reg_coord[40]; 3150 WORD flags = TEX_LOD; 3151 3152 shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest); 3153 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_coord); 3154 3155 if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ; 3156 if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS; 3157 3158 shader_hw_sample(ins, sampler_idx, reg_dest, reg_coord, flags, NULL, NULL); 3159 } 3160 3161 static void shader_hw_label(const struct wined3d_shader_instruction *ins) 3162 { 3163 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3164 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3165 3166 priv->in_main_func = FALSE; 3167 /* Call instructions activate the NV extensions, not labels and rets. If there is an uncalled 3168 * subroutine, don't generate a label that will make GL complain 3169 */ 3170 if(priv->target_version == ARB) return; 3171 3172 shader_addline(buffer, "l%u:\n", ins->src[0].reg.idx[0].offset); 3173 } 3174 3175 static void vshader_add_footer(struct shader_arb_ctx_priv *priv_ctx, 3176 const struct arb_vshader_private *shader_data, const struct arb_vs_compile_args *args, 3177 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info, 3178 struct wined3d_string_buffer *buffer) 3179 { 3180 unsigned int i; 3181 3182 /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used 3183 * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE), 3184 * the fog frag coord is thrown away. If the fog frag coord is used, but not written by 3185 * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0) 3186 */ 3187 if (args->super.fog_src == VS_FOG_Z) 3188 { 3189 shader_addline(buffer, "MOV result.fogcoord, TMP_OUT.z;\n"); 3190 } 3191 else 3192 { 3193 if (!reg_maps->fog) 3194 { 3195 /* posFixup.x is always 1.0, so we can safely use it */ 3196 shader_addline(buffer, "ADD result.fogcoord, posFixup.x, -posFixup.x;\n"); 3197 } 3198 else 3199 { 3200 /* Clamp fogcoord */ 3201 const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO); 3202 const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE); 3203 3204 shader_addline(buffer, "MIN TMP_FOGCOORD.x, TMP_FOGCOORD.x, %s;\n", one); 3205 shader_addline(buffer, "MAX result.fogcoord.x, TMP_FOGCOORD.x, %s;\n", zero); 3206 } 3207 } 3208 3209 /* Clipplanes are always stored without y inversion */ 3210 if (use_nv_clip(gl_info) && priv_ctx->target_version >= NV2) 3211 { 3212 if (args->super.clip_enabled) 3213 { 3214 for (i = 0; i < priv_ctx->vs_clipplanes; i++) 3215 { 3216 shader_addline(buffer, "DP4 result.clip[%u].x, TMP_OUT, state.clip[%u].plane;\n", i, i); 3217 } 3218 } 3219 } 3220 else if (args->clip.boolclip.clip_texcoord) 3221 { 3222 static const char component[4] = {'x', 'y', 'z', 'w'}; 3223 unsigned int cur_clip = 0; 3224 const char *zero = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ZERO); 3225 3226 for (i = 0; i < gl_info->limits.user_clip_distances; ++i) 3227 { 3228 if (args->clip.boolclip.clipplane_mask & (1u << i)) 3229 { 3230 shader_addline(buffer, "DP4 TA.%c, TMP_OUT, state.clip[%u].plane;\n", 3231 component[cur_clip++], i); 3232 } 3233 } 3234 switch (cur_clip) 3235 { 3236 case 0: 3237 shader_addline(buffer, "MOV TA, %s;\n", zero); 3238 break; 3239 case 1: 3240 shader_addline(buffer, "MOV TA.yzw, %s;\n", zero); 3241 break; 3242 case 2: 3243 shader_addline(buffer, "MOV TA.zw, %s;\n", zero); 3244 break; 3245 case 3: 3246 shader_addline(buffer, "MOV TA.w, %s;\n", zero); 3247 break; 3248 } 3249 shader_addline(buffer, "MOV result.texcoord[%u], TA;\n", 3250 args->clip.boolclip.clip_texcoord - 1); 3251 } 3252 3253 /* Write the final position. 3254 * 3255 * OpenGL coordinates specify the center of the pixel while d3d coords specify 3256 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains 3257 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x 3258 * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that. 3259 */ 3260 if (!gl_info->supported[ARB_CLIP_CONTROL]) 3261 { 3262 shader_addline(buffer, "MUL TA, posFixup, TMP_OUT.w;\n"); 3263 shader_addline(buffer, "ADD TMP_OUT.x, TMP_OUT.x, TA.z;\n"); 3264 shader_addline(buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TA.w;\n"); 3265 3266 /* Z coord [0;1]->[-1;1] mapping, see comment in 3267 * get_projection_matrix() in utils.c. */ 3268 if (need_helper_const(shader_data, reg_maps, gl_info)) 3269 { 3270 const char *two = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_TWO); 3271 shader_addline(buffer, "MAD TMP_OUT.z, TMP_OUT.z, %s, -TMP_OUT.w;\n", two); 3272 } 3273 else 3274 { 3275 shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, TMP_OUT.z;\n"); 3276 shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, -TMP_OUT.w;\n"); 3277 } 3278 } 3279 3280 shader_addline(buffer, "MOV result.position, TMP_OUT;\n"); 3281 3282 priv_ctx->footer_written = TRUE; 3283 } 3284 3285 static void shader_hw_ret(const struct wined3d_shader_instruction *ins) 3286 { 3287 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3288 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3289 const struct wined3d_shader *shader = ins->ctx->shader; 3290 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3291 3292 if(priv->target_version == ARB) return; 3293 3294 if(vshader) 3295 { 3296 if (priv->in_main_func) vshader_add_footer(priv, shader->backend_data, 3297 priv->cur_vs_args, ins->ctx->reg_maps, ins->ctx->gl_info, buffer); 3298 } 3299 3300 shader_addline(buffer, "RET;\n"); 3301 } 3302 3303 static void shader_hw_call(const struct wined3d_shader_instruction *ins) 3304 { 3305 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 3306 shader_addline(buffer, "CAL l%u;\n", ins->src[0].reg.idx[0].offset); 3307 } 3308 3309 static BOOL shader_arb_compile(const struct wined3d_gl_info *gl_info, GLenum target, const char *src) 3310 { 3311 const char *ptr, *line; 3312 GLint native, pos; 3313 3314 if (TRACE_ON(d3d_shader)) 3315 { 3316 ptr = src; 3317 while ((line = get_line(&ptr))) TRACE_(d3d_shader)(" %.*s", (int)(ptr - line), line); 3318 } 3319 3320 GL_EXTCALL(glProgramStringARB(target, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(src), src)); 3321 checkGLcall("glProgramStringARB()"); 3322 3323 if (FIXME_ON(d3d_shader)) 3324 { 3325 gl_info->gl_ops.gl.p_glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 3326 if (pos != -1) 3327 { 3328 FIXME_(d3d_shader)("Program error at position %d: %s\n\n", pos, 3329 debugstr_a((const char *)gl_info->gl_ops.gl.p_glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 3330 ptr = src; 3331 while ((line = get_line(&ptr))) FIXME_(d3d_shader)(" %.*s", (int)(ptr - line), line); 3332 FIXME_(d3d_shader)("\n"); 3333 3334 return FALSE; 3335 } 3336 } 3337 3338 if (WARN_ON(d3d_perf)) 3339 { 3340 GL_EXTCALL(glGetProgramivARB(target, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 3341 checkGLcall("glGetProgramivARB()"); 3342 if (!native) 3343 WARN_(d3d_perf)("Program exceeds native resource limits.\n"); 3344 } 3345 3346 return TRUE; 3347 } 3348 3349 static void arbfp_add_sRGB_correction(struct wined3d_string_buffer *buffer, const char *fragcolor, 3350 const char *tmp1, const char *tmp2, const char *tmp3, const char *tmp4, BOOL condcode) 3351 { 3352 /* Perform sRGB write correction. See GLX_EXT_framebuffer_sRGB */ 3353 3354 if(condcode) 3355 { 3356 /* Sigh. MOVC CC doesn't work, so use one of the temps as dummy dest */ 3357 shader_addline(buffer, "SUBC %s, %s.x, srgb_consts1.x;\n", tmp1, fragcolor); 3358 /* Calculate the > 0.0031308 case */ 3359 shader_addline(buffer, "POW %s.x (GE), %s.x, srgb_consts0.x;\n", fragcolor, fragcolor); 3360 shader_addline(buffer, "POW %s.y (GE), %s.y, srgb_consts0.x;\n", fragcolor, fragcolor); 3361 shader_addline(buffer, "POW %s.z (GE), %s.z, srgb_consts0.x;\n", fragcolor, fragcolor); 3362 shader_addline(buffer, "MUL %s.xyz (GE), %s, srgb_consts0.y;\n", fragcolor, fragcolor); 3363 shader_addline(buffer, "SUB %s.xyz (GE), %s, srgb_consts0.z;\n", fragcolor, fragcolor); 3364 /* Calculate the < case */ 3365 shader_addline(buffer, "MUL %s.xyz (LT), srgb_consts0.w, %s;\n", fragcolor, fragcolor); 3366 } 3367 else 3368 { 3369 /* Calculate the > 0.0031308 case */ 3370 shader_addline(buffer, "POW %s.x, %s.x, srgb_consts0.x;\n", tmp1, fragcolor); 3371 shader_addline(buffer, "POW %s.y, %s.y, srgb_consts0.x;\n", tmp1, fragcolor); 3372 shader_addline(buffer, "POW %s.z, %s.z, srgb_consts0.x;\n", tmp1, fragcolor); 3373 shader_addline(buffer, "MUL %s, %s, srgb_consts0.y;\n", tmp1, tmp1); 3374 shader_addline(buffer, "SUB %s, %s, srgb_consts0.z;\n", tmp1, tmp1); 3375 /* Calculate the < case */ 3376 shader_addline(buffer, "MUL %s, srgb_consts0.w, %s;\n", tmp2, fragcolor); 3377 /* Get 1.0 / 0.0 masks for > 0.0031308 and < 0.0031308 */ 3378 shader_addline(buffer, "SLT %s, srgb_consts1.x, %s;\n", tmp3, fragcolor); 3379 shader_addline(buffer, "SGE %s, srgb_consts1.x, %s;\n", tmp4, fragcolor); 3380 /* Store the components > 0.0031308 in the destination */ 3381 shader_addline(buffer, "MUL %s.xyz, %s, %s;\n", fragcolor, tmp1, tmp3); 3382 /* Add the components that are < 0.0031308 */ 3383 shader_addline(buffer, "MAD %s.xyz, %s, %s, %s;\n", fragcolor, tmp2, tmp4, fragcolor); 3384 /* Move everything into result.color at once. Nvidia hardware cannot handle partial 3385 * result.color writes(.rgb first, then .a), or handle overwriting already written 3386 * components. The assembler uses a temporary register in this case, which is usually 3387 * not allocated from one of our registers that were used earlier. 3388 */ 3389 } 3390 /* [0.0;1.0] clamping. Not needed, this is done implicitly */ 3391 } 3392 3393 static const DWORD *find_loop_control_values(const struct wined3d_shader *shader, DWORD idx) 3394 { 3395 const struct wined3d_shader_lconst *constant; 3396 3397 LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry) 3398 { 3399 if (constant->idx == idx) 3400 { 3401 return constant->value; 3402 } 3403 } 3404 return NULL; 3405 } 3406 3407 static void init_ps_input(const struct wined3d_shader *shader, 3408 const struct arb_ps_compile_args *args, struct shader_arb_ctx_priv *priv) 3409 { 3410 static const char * const texcoords[8] = 3411 { 3412 "fragment.texcoord[0]", "fragment.texcoord[1]", "fragment.texcoord[2]", "fragment.texcoord[3]", 3413 "fragment.texcoord[4]", "fragment.texcoord[5]", "fragment.texcoord[6]", "fragment.texcoord[7]" 3414 }; 3415 unsigned int i; 3416 const struct wined3d_shader_signature_element *input; 3417 const char *semantic_name; 3418 DWORD semantic_idx; 3419 3420 switch(args->super.vp_mode) 3421 { 3422 case pretransformed: 3423 case fixedfunction: 3424 /* The pixelshader has to collect the varyings on its own. In any case properly load 3425 * color0 and color1. In the case of pretransformed vertices also load texcoords. Set 3426 * other attribs to 0.0. 3427 * 3428 * For fixedfunction this behavior is correct, according to the tests. For pretransformed 3429 * we'd either need a replacement shader that can load other attribs like BINORMAL, or 3430 * load the texcoord attrib pointers to match the pixel shader signature 3431 */ 3432 for (i = 0; i < shader->input_signature.element_count; ++i) 3433 { 3434 input = &shader->input_signature.elements[i]; 3435 if (!(semantic_name = input->semantic_name)) 3436 continue; 3437 semantic_idx = input->semantic_idx; 3438 3439 if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_COLOR)) 3440 { 3441 if (!semantic_idx) 3442 priv->ps_input[input->register_idx] = "fragment.color.primary"; 3443 else if (semantic_idx == 1) 3444 priv->ps_input[input->register_idx] = "fragment.color.secondary"; 3445 else 3446 priv->ps_input[input->register_idx] = "0.0"; 3447 } 3448 else if (args->super.vp_mode == fixedfunction) 3449 { 3450 priv->ps_input[input->register_idx] = "0.0"; 3451 } 3452 else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_TEXCOORD)) 3453 { 3454 if (semantic_idx < 8) 3455 priv->ps_input[input->register_idx] = texcoords[semantic_idx]; 3456 else 3457 priv->ps_input[input->register_idx] = "0.0"; 3458 } 3459 else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_FOG)) 3460 { 3461 if (!semantic_idx) 3462 priv->ps_input[input->register_idx] = "fragment.fogcoord"; 3463 else 3464 priv->ps_input[input->register_idx] = "0.0"; 3465 } 3466 else 3467 { 3468 priv->ps_input[input->register_idx] = "0.0"; 3469 } 3470 3471 TRACE("v%u, semantic %s%u is %s\n", input->register_idx, 3472 semantic_name, semantic_idx, priv->ps_input[input->register_idx]); 3473 } 3474 break; 3475 3476 case vertexshader: 3477 /* That one is easy. The vertex shaders provide v0-v7 in fragment.texcoord and v8 and v9 in 3478 * fragment.color 3479 */ 3480 for(i = 0; i < 8; i++) 3481 { 3482 priv->ps_input[i] = texcoords[i]; 3483 } 3484 priv->ps_input[8] = "fragment.color.primary"; 3485 priv->ps_input[9] = "fragment.color.secondary"; 3486 break; 3487 } 3488 } 3489 3490 static void arbfp_add_linear_fog(struct wined3d_string_buffer *buffer, 3491 const char *fragcolor, const char *tmp) 3492 { 3493 shader_addline(buffer, "SUB %s.x, state.fog.params.z, fragment.fogcoord.x;\n", tmp); 3494 shader_addline(buffer, "MUL_SAT %s.x, %s.x, state.fog.params.w;\n", tmp, tmp); 3495 shader_addline(buffer, "LRP %s.rgb, %s.x, %s, state.fog.color;\n", fragcolor, tmp, fragcolor); 3496 } 3497 3498 /* Context activation is done by the caller. */ 3499 static GLuint shader_arb_generate_pshader(const struct wined3d_shader *shader, 3500 const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer, 3501 const struct arb_ps_compile_args *args, struct arb_ps_compiled_shader *compiled) 3502 { 3503 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 3504 GLuint retval; 3505 char fragcolor[16]; 3506 DWORD next_local = 0; 3507 struct shader_arb_ctx_priv priv_ctx; 3508 BOOL dcl_td = FALSE; 3509 BOOL want_nv_prog = FALSE; 3510 struct arb_pshader_private *shader_priv = shader->backend_data; 3511 DWORD map; 3512 BOOL custom_linear_fog = FALSE; 3513 3514 char srgbtmp[4][4]; 3515 char ftoa_tmp[17]; 3516 unsigned int i, found = 0; 3517 3518 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) 3519 { 3520 if (!(map & 1) 3521 || (shader->u.ps.color0_mov && i == shader->u.ps.color0_reg) 3522 || (reg_maps->shader_version.major < 2 && !i)) 3523 continue; 3524 3525 sprintf(srgbtmp[found], "R%u", i); 3526 ++found; 3527 if (found == 4) break; 3528 } 3529 3530 switch(found) { 3531 case 0: 3532 sprintf(srgbtmp[0], "TA"); 3533 sprintf(srgbtmp[1], "TB"); 3534 sprintf(srgbtmp[2], "TC"); 3535 sprintf(srgbtmp[3], "TD"); 3536 dcl_td = TRUE; 3537 break; 3538 case 1: 3539 sprintf(srgbtmp[1], "TA"); 3540 sprintf(srgbtmp[2], "TB"); 3541 sprintf(srgbtmp[3], "TC"); 3542 break; 3543 case 2: 3544 sprintf(srgbtmp[2], "TA"); 3545 sprintf(srgbtmp[3], "TB"); 3546 break; 3547 case 3: 3548 sprintf(srgbtmp[3], "TA"); 3549 break; 3550 case 4: 3551 break; 3552 } 3553 3554 /* Create the hw ARB shader */ 3555 memset(&priv_ctx, 0, sizeof(priv_ctx)); 3556 priv_ctx.cur_ps_args = args; 3557 priv_ctx.compiled_fprog = compiled; 3558 priv_ctx.cur_np2fixup_info = &compiled->np2fixup_info; 3559 init_ps_input(shader, args, &priv_ctx); 3560 list_init(&priv_ctx.control_frames); 3561 priv_ctx.ps_post_process = args->super.srgb_correction; 3562 3563 /* Avoid enabling NV_fragment_program* if we do not need it. 3564 * 3565 * Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register, 3566 * and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation 3567 * is faster than what we gain from using higher native instructions. There are some things though 3568 * that cannot be emulated. In that case enable the extensions. 3569 * If the extension is enabled, instruction handlers that support both ways will use it. 3570 * 3571 * Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program. 3572 * So enable the best we can get. 3573 */ 3574 if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0 || reg_maps->usestexldd || 3575 reg_maps->usestexldl || reg_maps->usesfacing || reg_maps->usesifc || reg_maps->usescall) 3576 { 3577 want_nv_prog = TRUE; 3578 } 3579 3580 shader_addline(buffer, "!!ARBfp1.0\n"); 3581 if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM2]) 3582 { 3583 shader_addline(buffer, "OPTION NV_fragment_program2;\n"); 3584 priv_ctx.target_version = NV3; 3585 } 3586 else if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]) 3587 { 3588 shader_addline(buffer, "OPTION NV_fragment_program;\n"); 3589 priv_ctx.target_version = NV2; 3590 } else { 3591 if(want_nv_prog) 3592 { 3593 /* This is an error - either we're advertising the wrong shader version, or aren't enforcing some 3594 * limits properly 3595 */ 3596 ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n"); 3597 ERR("Try GLSL\n"); 3598 } 3599 priv_ctx.target_version = ARB; 3600 } 3601 3602 if (reg_maps->rt_mask > 1) 3603 { 3604 shader_addline(buffer, "OPTION ARB_draw_buffers;\n"); 3605 } 3606 3607 if (reg_maps->shader_version.major < 3) 3608 { 3609 switch (args->super.fog) 3610 { 3611 case WINED3D_FFP_PS_FOG_OFF: 3612 break; 3613 case WINED3D_FFP_PS_FOG_LINEAR: 3614 if (gl_info->quirks & WINED3D_QUIRK_BROKEN_ARB_FOG) 3615 { 3616 custom_linear_fog = TRUE; 3617 priv_ctx.ps_post_process = TRUE; 3618 break; 3619 } 3620 shader_addline(buffer, "OPTION ARB_fog_linear;\n"); 3621 break; 3622 case WINED3D_FFP_PS_FOG_EXP: 3623 shader_addline(buffer, "OPTION ARB_fog_exp;\n"); 3624 break; 3625 case WINED3D_FFP_PS_FOG_EXP2: 3626 shader_addline(buffer, "OPTION ARB_fog_exp2;\n"); 3627 break; 3628 } 3629 } 3630 3631 /* For now always declare the temps. At least the Nvidia assembler optimizes completely 3632 * unused temps away(but occupies them for the whole shader if they're used once). Always 3633 * declaring them avoids tricky bookkeeping work 3634 */ 3635 shader_addline(buffer, "TEMP TA;\n"); /* Used for modifiers */ 3636 shader_addline(buffer, "TEMP TB;\n"); /* Used for modifiers */ 3637 shader_addline(buffer, "TEMP TC;\n"); /* Used for modifiers */ 3638 if(dcl_td) shader_addline(buffer, "TEMP TD;\n"); /* Used for sRGB writing */ 3639 shader_addline(buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n"); 3640 shader_addline(buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n"); 3641 wined3d_ftoa(eps, ftoa_tmp); 3642 shader_addline(buffer, "PARAM ps_helper_const = { 0.0, 1.0, %s, 0.0 };\n", ftoa_tmp); 3643 3644 if (reg_maps->shader_version.major < 2) 3645 { 3646 strcpy(fragcolor, "R0"); 3647 } 3648 else 3649 { 3650 if (priv_ctx.ps_post_process) 3651 { 3652 if (shader->u.ps.color0_mov) 3653 { 3654 sprintf(fragcolor, "R%u", shader->u.ps.color0_reg); 3655 } 3656 else 3657 { 3658 shader_addline(buffer, "TEMP TMP_COLOR;\n"); 3659 strcpy(fragcolor, "TMP_COLOR"); 3660 } 3661 } else { 3662 strcpy(fragcolor, "result.color"); 3663 } 3664 } 3665 3666 if (args->super.srgb_correction) 3667 { 3668 shader_addline(buffer, "PARAM srgb_consts0 = "); 3669 shader_arb_append_imm_vec4(buffer, wined3d_srgb_const0); 3670 shader_addline(buffer, ";\n"); 3671 shader_addline(buffer, "PARAM srgb_consts1 = "); 3672 shader_arb_append_imm_vec4(buffer, wined3d_srgb_const1); 3673 shader_addline(buffer, ";\n"); 3674 } 3675 3676 /* Base Declarations */ 3677 shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info, NULL, &priv_ctx); 3678 3679 for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i) 3680 { 3681 unsigned char bump_const; 3682 3683 if (!(map & 1)) continue; 3684 3685 bump_const = compiled->numbumpenvmatconsts; 3686 compiled->bumpenvmatconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED; 3687 compiled->bumpenvmatconst[bump_const].texunit = i; 3688 compiled->luminanceconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED; 3689 compiled->luminanceconst[bump_const].texunit = i; 3690 3691 /* We can fit the constants into the constant limit for sure because texbem, texbeml, bem and beml are only supported 3692 * in 1.x shaders, and GL_ARB_fragment_program has a constant limit of 24 constants. So in the worst case we're loading 3693 * 8 shader constants, 8 bump matrices and 8 luminance parameters and are perfectly fine. (No NP2 fixup on bumpmapped 3694 * textures due to conditional NP2 restrictions) 3695 * 3696 * Use local constants to load the bump env parameters, not program.env. This avoids collisions with d3d constants of 3697 * shaders in newer shader models. Since the bump env parameters have to share their space with NP2 fixup constants, 3698 * their location is shader dependent anyway and they cannot be loaded globally. 3699 */ 3700 compiled->bumpenvmatconst[bump_const].const_num = next_local++; 3701 shader_addline(buffer, "PARAM bumpenvmat%d = program.local[%d];\n", 3702 i, compiled->bumpenvmatconst[bump_const].const_num); 3703 compiled->numbumpenvmatconsts = bump_const + 1; 3704 3705 if (!(reg_maps->luminanceparams & (1u << i))) 3706 continue; 3707 3708 compiled->luminanceconst[bump_const].const_num = next_local++; 3709 shader_addline(buffer, "PARAM luminance%d = program.local[%d];\n", 3710 i, compiled->luminanceconst[bump_const].const_num); 3711 } 3712 3713 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 3714 { 3715 compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED; 3716 if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2) 3717 { 3718 const DWORD *control_values = find_loop_control_values(shader, i); 3719 3720 if(control_values) 3721 { 3722 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i, 3723 control_values[0], control_values[1], control_values[2]); 3724 } 3725 else 3726 { 3727 compiled->int_consts[i] = next_local; 3728 compiled->num_int_consts++; 3729 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++); 3730 } 3731 } 3732 } 3733 3734 if(reg_maps->vpos || reg_maps->usesdsy) 3735 { 3736 compiled->ycorrection = next_local; 3737 shader_addline(buffer, "PARAM ycorrection = program.local[%u];\n", next_local++); 3738 3739 if(reg_maps->vpos) 3740 { 3741 shader_addline(buffer, "TEMP vpos;\n"); 3742 /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen). 3743 * ycorrection.y: -1.0(onscreen), 1.0(offscreen) 3744 * ycorrection.z: 1.0 3745 * ycorrection.w: 0.0 3746 */ 3747 shader_addline(buffer, "MAD vpos, fragment.position, ycorrection.zyww, ycorrection.wxww;\n"); 3748 shader_addline(buffer, "FLR vpos.xy, vpos;\n"); 3749 } 3750 } 3751 else 3752 { 3753 compiled->ycorrection = WINED3D_CONST_NUM_UNUSED; 3754 } 3755 3756 /* Load constants to fixup NP2 texcoords if there are still free constants left: 3757 * Constants (texture dimensions) for the NP2 fixup are loaded as local program parameters. This will consume 3758 * at most 8 (MAX_FRAGMENT_SAMPLERS / 2) parameters, which is highly unlikely, since the application had to 3759 * use 16 NP2 textures at the same time. In case that we run out of constants the fixup is simply not 3760 * applied / activated. This will probably result in wrong rendering of the texture, but will save us from 3761 * shader compilation errors and the subsequent errors when drawing with this shader. */ 3762 if (priv_ctx.cur_ps_args->super.np2_fixup) { 3763 unsigned char cur_fixup_sampler = 0; 3764 3765 struct arb_ps_np2fixup_info* const fixup = priv_ctx.cur_np2fixup_info; 3766 const WORD map = priv_ctx.cur_ps_args->super.np2_fixup; 3767 const UINT max_lconsts = gl_info->limits.arb_ps_local_constants; 3768 3769 fixup->offset = next_local; 3770 fixup->super.active = 0; 3771 3772 for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) 3773 { 3774 if (!(map & (1u << i))) 3775 continue; 3776 3777 if (fixup->offset + (cur_fixup_sampler >> 1) < max_lconsts) 3778 { 3779 fixup->super.active |= (1u << i); 3780 fixup->super.idx[i] = cur_fixup_sampler++; 3781 } 3782 else 3783 { 3784 FIXME("No free constant found to load NP2 fixup data into shader. " 3785 "Sampling from this texture will probably look wrong.\n"); 3786 break; 3787 } 3788 } 3789 3790 fixup->super.num_consts = (cur_fixup_sampler + 1) >> 1; 3791 if (fixup->super.num_consts) { 3792 shader_addline(buffer, "PARAM np2fixup[%u] = { program.env[%u..%u] };\n", 3793 fixup->super.num_consts, fixup->offset, fixup->super.num_consts + fixup->offset - 1); 3794 } 3795 } 3796 3797 if (shader_priv->clipplane_emulation != ~0U && args->clip) 3798 { 3799 shader_addline(buffer, "KIL fragment.texcoord[%u];\n", shader_priv->clipplane_emulation); 3800 } 3801 3802 /* Base Shader Body */ 3803 if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL))) 3804 return 0; 3805 3806 if(args->super.srgb_correction) { 3807 arbfp_add_sRGB_correction(buffer, fragcolor, srgbtmp[0], srgbtmp[1], srgbtmp[2], srgbtmp[3], 3808 priv_ctx.target_version >= NV2); 3809 } 3810 3811 if (custom_linear_fog) 3812 arbfp_add_linear_fog(buffer, fragcolor, "TA"); 3813 3814 if(strcmp(fragcolor, "result.color")) { 3815 shader_addline(buffer, "MOV result.color, %s;\n", fragcolor); 3816 } 3817 shader_addline(buffer, "END\n"); 3818 3819 /* TODO: change to resource.glObjectHandle or something like that */ 3820 GL_EXTCALL(glGenProgramsARB(1, &retval)); 3821 3822 TRACE("Creating a hw pixel shader, prg=%d\n", retval); 3823 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval)); 3824 3825 TRACE("Created hw pixel shader, prg=%d\n", retval); 3826 if (!shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer->buffer)) 3827 return 0; 3828 3829 return retval; 3830 } 3831 3832 static int compare_sig(const struct wined3d_shader_signature *sig1, const struct wined3d_shader_signature *sig2) 3833 { 3834 unsigned int i; 3835 int ret; 3836 3837 if (sig1->element_count != sig2->element_count) 3838 return sig1->element_count < sig2->element_count ? -1 : 1; 3839 3840 for (i = 0; i < sig1->element_count; ++i) 3841 { 3842 const struct wined3d_shader_signature_element *e1, *e2; 3843 3844 e1 = &sig1->elements[i]; 3845 e2 = &sig2->elements[i]; 3846 3847 if (!e1->semantic_name || !e2->semantic_name) 3848 { 3849 /* Compare pointers, not contents. One string is NULL (element 3850 * does not exist), the other one is not NULL. */ 3851 if (e1->semantic_name != e2->semantic_name) 3852 return e1->semantic_name < e2->semantic_name ? -1 : 1; 3853 continue; 3854 } 3855 3856 if ((ret = strcmp(e1->semantic_name, e2->semantic_name))) 3857 return ret; 3858 if (e1->semantic_idx != e2->semantic_idx) 3859 return e1->semantic_idx < e2->semantic_idx ? -1 : 1; 3860 if (e1->sysval_semantic != e2->sysval_semantic) 3861 return e1->sysval_semantic < e2->sysval_semantic ? -1 : 1; 3862 if (e1->component_type != e2->component_type) 3863 return e1->component_type < e2->component_type ? -1 : 1; 3864 if (e1->register_idx != e2->register_idx) 3865 return e1->register_idx < e2->register_idx ? -1 : 1; 3866 if (e1->mask != e2->mask) 3867 return e1->mask < e2->mask ? -1 : 1; 3868 } 3869 return 0; 3870 } 3871 3872 static void clone_sig(struct wined3d_shader_signature *new, const struct wined3d_shader_signature *sig) 3873 { 3874 unsigned int i; 3875 char *name; 3876 3877 new->element_count = sig->element_count; 3878 new->elements = wined3d_calloc(new->element_count, sizeof(*new->elements)); 3879 for (i = 0; i < sig->element_count; ++i) 3880 { 3881 new->elements[i] = sig->elements[i]; 3882 3883 if (!new->elements[i].semantic_name) 3884 continue; 3885 3886 /* Clone the semantic string */ 3887 name = HeapAlloc(GetProcessHeap(), 0, strlen(sig->elements[i].semantic_name) + 1); 3888 strcpy(name, sig->elements[i].semantic_name); 3889 new->elements[i].semantic_name = name; 3890 } 3891 } 3892 3893 static DWORD find_input_signature(struct shader_arb_priv *priv, const struct wined3d_shader_signature *sig) 3894 { 3895 struct wine_rb_entry *entry = wine_rb_get(&priv->signature_tree, sig); 3896 struct ps_signature *found_sig; 3897 3898 if (entry) 3899 { 3900 found_sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 3901 TRACE("Found existing signature %u\n", found_sig->idx); 3902 return found_sig->idx; 3903 } 3904 found_sig = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*found_sig)); 3905 clone_sig(&found_sig->sig, sig); 3906 found_sig->idx = priv->ps_sig_number++; 3907 TRACE("New signature stored and assigned number %u\n", found_sig->idx); 3908 if(wine_rb_put(&priv->signature_tree, sig, &found_sig->entry) == -1) 3909 { 3910 ERR("Failed to insert program entry.\n"); 3911 } 3912 return found_sig->idx; 3913 } 3914 3915 static void init_output_registers(const struct wined3d_shader *shader, 3916 const struct wined3d_shader_signature *ps_input_sig, 3917 struct shader_arb_ctx_priv *priv_ctx, struct arb_vs_compiled_shader *compiled) 3918 { 3919 unsigned int i, j; 3920 static const char * const texcoords[8] = 3921 { 3922 "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]", 3923 "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]" 3924 }; 3925 /* Write generic input varyings 0 to 7 to result.texcoord[], varying 8 to result.color.primary 3926 * and varying 9 to result.color.secondary 3927 */ 3928 static const char * const decl_idx_to_string[MAX_REG_INPUT] = 3929 { 3930 "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]", 3931 "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]", 3932 "result.color.primary", "result.color.secondary" 3933 }; 3934 3935 if (!ps_input_sig) 3936 { 3937 TRACE("Pixel shader uses builtin varyings\n"); 3938 /* Map builtins to builtins */ 3939 for(i = 0; i < 8; i++) 3940 { 3941 priv_ctx->texcrd_output[i] = texcoords[i]; 3942 } 3943 priv_ctx->color_output[0] = "result.color.primary"; 3944 priv_ctx->color_output[1] = "result.color.secondary"; 3945 priv_ctx->fog_output = "TMP_FOGCOORD"; 3946 3947 /* Map declared regs to builtins. Use "TA" to /dev/null unread output */ 3948 for (i = 0; i < shader->output_signature.element_count; ++i) 3949 { 3950 const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i]; 3951 3952 if (!output->semantic_name) 3953 continue; 3954 3955 if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_POSITION)) 3956 { 3957 TRACE("o%u is TMP_OUT\n", output->register_idx); 3958 if (!output->semantic_idx) 3959 priv_ctx->vs_output[output->register_idx] = "TMP_OUT"; 3960 else 3961 priv_ctx->vs_output[output->register_idx] = "TA"; 3962 } 3963 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_PSIZE)) 3964 { 3965 TRACE("o%u is result.pointsize\n", output->register_idx); 3966 if (!output->semantic_idx) 3967 priv_ctx->vs_output[output->register_idx] = "result.pointsize"; 3968 else 3969 priv_ctx->vs_output[output->register_idx] = "TA"; 3970 } 3971 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_COLOR)) 3972 { 3973 TRACE("o%u is result.color.?, idx %u\n", output->register_idx, output->semantic_idx); 3974 if (!output->semantic_idx) 3975 priv_ctx->vs_output[output->register_idx] = "result.color.primary"; 3976 else if (output->semantic_idx == 1) 3977 priv_ctx->vs_output[output->register_idx] = "result.color.secondary"; 3978 else priv_ctx->vs_output[output->register_idx] = "TA"; 3979 } 3980 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_TEXCOORD)) 3981 { 3982 TRACE("o%u is result.texcoord[%u]\n", output->register_idx, output->semantic_idx); 3983 if (output->semantic_idx >= 8) 3984 priv_ctx->vs_output[output->register_idx] = "TA"; 3985 else 3986 priv_ctx->vs_output[output->register_idx] = texcoords[output->semantic_idx]; 3987 } 3988 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_FOG)) 3989 { 3990 TRACE("o%u is result.fogcoord\n", output->register_idx); 3991 if (output->semantic_idx > 0) 3992 priv_ctx->vs_output[output->register_idx] = "TA"; 3993 else 3994 priv_ctx->vs_output[output->register_idx] = "result.fogcoord"; 3995 } 3996 else 3997 { 3998 priv_ctx->vs_output[output->register_idx] = "TA"; 3999 } 4000 } 4001 return; 4002 } 4003 4004 TRACE("Pixel shader uses declared varyings\n"); 4005 4006 /* Map builtin to declared. /dev/null the results by default to the TA temp reg */ 4007 for(i = 0; i < 8; i++) 4008 { 4009 priv_ctx->texcrd_output[i] = "TA"; 4010 } 4011 priv_ctx->color_output[0] = "TA"; 4012 priv_ctx->color_output[1] = "TA"; 4013 priv_ctx->fog_output = "TA"; 4014 4015 for (i = 0; i < ps_input_sig->element_count; ++i) 4016 { 4017 const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[i]; 4018 4019 if (!input->semantic_name) 4020 continue; 4021 4022 /* If a declared input register is not written by builtin arguments, don't write to it. 4023 * GL_NV_vertex_program makes sure the input defaults to 0.0, which is correct with D3D 4024 * 4025 * Don't care about POSITION and PSIZE here - this is a builtin vertex shader, position goes 4026 * to TMP_OUT in any case 4027 */ 4028 if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_TEXCOORD)) 4029 { 4030 if (input->semantic_idx < 8) 4031 priv_ctx->texcrd_output[input->semantic_idx] = decl_idx_to_string[input->register_idx]; 4032 } 4033 else if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_COLOR)) 4034 { 4035 if (input->semantic_idx < 2) 4036 priv_ctx->color_output[input->semantic_idx] = decl_idx_to_string[input->register_idx]; 4037 } 4038 else if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_FOG)) 4039 { 4040 if (!input->semantic_idx) 4041 priv_ctx->fog_output = decl_idx_to_string[input->register_idx]; 4042 } 4043 else 4044 { 4045 continue; 4046 } 4047 4048 if (!strcmp(decl_idx_to_string[input->register_idx], "result.color.primary") 4049 || !strcmp(decl_idx_to_string[input->register_idx], "result.color.secondary")) 4050 { 4051 compiled->need_color_unclamp = TRUE; 4052 } 4053 } 4054 4055 /* Map declared to declared */ 4056 for (i = 0; i < shader->output_signature.element_count; ++i) 4057 { 4058 const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i]; 4059 4060 /* Write unread output to TA to throw them away */ 4061 priv_ctx->vs_output[output->register_idx] = "TA"; 4062 4063 if (!output->semantic_name) 4064 continue; 4065 4066 if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_POSITION) && !output->semantic_idx) 4067 { 4068 priv_ctx->vs_output[output->register_idx] = "TMP_OUT"; 4069 continue; 4070 } 4071 else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_PSIZE) && !output->semantic_idx) 4072 { 4073 priv_ctx->vs_output[output->register_idx] = "result.pointsize"; 4074 continue; 4075 } 4076 4077 for (j = 0; j < ps_input_sig->element_count; ++j) 4078 { 4079 const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[j]; 4080 4081 if (!input->semantic_name) 4082 continue; 4083 4084 if (!strcmp(input->semantic_name, output->semantic_name) 4085 && input->semantic_idx == output->semantic_idx) 4086 { 4087 priv_ctx->vs_output[output->register_idx] = decl_idx_to_string[input->register_idx]; 4088 4089 if (!strcmp(priv_ctx->vs_output[output->register_idx], "result.color.primary") 4090 || !strcmp(priv_ctx->vs_output[output->register_idx], "result.color.secondary")) 4091 { 4092 compiled->need_color_unclamp = TRUE; 4093 } 4094 } 4095 } 4096 } 4097 } 4098 4099 /* Context activation is done by the caller. */ 4100 static GLuint shader_arb_generate_vshader(const struct wined3d_shader *shader, 4101 const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer, 4102 const struct arb_vs_compile_args *args, struct arb_vs_compiled_shader *compiled, 4103 const struct wined3d_shader_signature *ps_input_sig) 4104 { 4105 const struct arb_vshader_private *shader_data = shader->backend_data; 4106 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 4107 struct shader_arb_priv *priv = shader->device->shader_priv; 4108 GLuint ret; 4109 DWORD next_local = 0; 4110 struct shader_arb_ctx_priv priv_ctx; 4111 unsigned int i; 4112 4113 memset(&priv_ctx, 0, sizeof(priv_ctx)); 4114 priv_ctx.cur_vs_args = args; 4115 list_init(&priv_ctx.control_frames); 4116 init_output_registers(shader, ps_input_sig, &priv_ctx, compiled); 4117 4118 /* Create the hw ARB shader */ 4119 shader_addline(buffer, "!!ARBvp1.0\n"); 4120 4121 /* Always enable the NV extension if available. Unlike fragment shaders, there is no 4122 * mesurable performance penalty, and we can always make use of it for clipplanes. 4123 */ 4124 if (gl_info->supported[NV_VERTEX_PROGRAM3]) 4125 { 4126 shader_addline(buffer, "OPTION NV_vertex_program3;\n"); 4127 priv_ctx.target_version = NV3; 4128 shader_addline(buffer, "ADDRESS aL;\n"); 4129 } 4130 else if (gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]) 4131 { 4132 shader_addline(buffer, "OPTION NV_vertex_program2;\n"); 4133 priv_ctx.target_version = NV2; 4134 shader_addline(buffer, "ADDRESS aL;\n"); 4135 } else { 4136 priv_ctx.target_version = ARB; 4137 } 4138 4139 shader_addline(buffer, "TEMP TMP_OUT;\n"); 4140 if (reg_maps->fog) 4141 shader_addline(buffer, "TEMP TMP_FOGCOORD;\n"); 4142 if (need_helper_const(shader_data, reg_maps, gl_info)) 4143 { 4144 char ftoa_tmp[17]; 4145 wined3d_ftoa(eps, ftoa_tmp); 4146 shader_addline(buffer, "PARAM helper_const = { 0.0, 1.0, 2.0, %s};\n", ftoa_tmp); 4147 } 4148 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) 4149 { 4150 shader_addline(buffer, "PARAM rel_addr_const = { 0.5, %d.0, 0.0, 0.0 };\n", shader_data->rel_offset); 4151 shader_addline(buffer, "TEMP A0_SHADOW;\n"); 4152 } 4153 4154 shader_addline(buffer, "TEMP TA;\n"); 4155 shader_addline(buffer, "TEMP TB;\n"); 4156 4157 /* Base Declarations */ 4158 shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info, 4159 &priv_ctx.vs_clipplanes, &priv_ctx); 4160 4161 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 4162 { 4163 compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED; 4164 if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2) 4165 { 4166 const DWORD *control_values = find_loop_control_values(shader, i); 4167 4168 if(control_values) 4169 { 4170 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i, 4171 control_values[0], control_values[1], control_values[2]); 4172 } 4173 else 4174 { 4175 compiled->int_consts[i] = next_local; 4176 compiled->num_int_consts++; 4177 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++); 4178 } 4179 } 4180 } 4181 4182 /* We need a constant to fixup the final position */ 4183 shader_addline(buffer, "PARAM posFixup = program.local[%u];\n", next_local); 4184 compiled->pos_fixup = next_local++; 4185 4186 /* Initialize output parameters. GL_ARB_vertex_program does not require special initialization values 4187 * for output parameters. D3D in theory does not do that either, but some applications depend on a 4188 * proper initialization of the secondary color, and programs using the fixed function pipeline without 4189 * a replacement shader depend on the texcoord.w being set properly. 4190 * 4191 * GL_NV_vertex_program defines that all output values are initialized to {0.0, 0.0, 0.0, 1.0}. This 4192 * assertion is in effect even when using GL_ARB_vertex_program without any NV specific additions. So 4193 * skip this if NV_vertex_program is supported. Otherwise, initialize the secondary color. For the tex- 4194 * coords, we have a flag in the opengl caps. Many cards do not require the texcoord being set, and 4195 * this can eat a number of instructions, so skip it unless this cap is set as well 4196 */ 4197 if (!gl_info->supported[NV_VERTEX_PROGRAM]) 4198 { 4199 const char *color_init = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_0001); 4200 shader_addline(buffer, "MOV result.color.secondary, %s;\n", color_init); 4201 4202 if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W && !priv->ffp_proj_control) 4203 { 4204 int i; 4205 const char *one = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ONE); 4206 for(i = 0; i < MAX_REG_TEXCRD; i++) 4207 { 4208 if (reg_maps->u.texcoord_mask[i] && reg_maps->u.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) 4209 shader_addline(buffer, "MOV result.texcoord[%u].w, %s\n", i, one); 4210 } 4211 } 4212 } 4213 4214 /* The shader starts with the main function */ 4215 priv_ctx.in_main_func = TRUE; 4216 /* Base Shader Body */ 4217 if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL))) 4218 return -1; 4219 4220 if (!priv_ctx.footer_written) vshader_add_footer(&priv_ctx, 4221 shader_data, args, reg_maps, gl_info, buffer); 4222 4223 shader_addline(buffer, "END\n"); 4224 4225 /* TODO: change to resource.glObjectHandle or something like that */ 4226 GL_EXTCALL(glGenProgramsARB(1, &ret)); 4227 4228 TRACE("Creating a hw vertex shader, prg=%d\n", ret); 4229 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, ret)); 4230 4231 TRACE("Created hw vertex shader, prg=%d\n", ret); 4232 if (!shader_arb_compile(gl_info, GL_VERTEX_PROGRAM_ARB, buffer->buffer)) 4233 return -1; 4234 4235 return ret; 4236 } 4237 4238 /* Context activation is done by the caller. */ 4239 static struct arb_ps_compiled_shader *find_arb_pshader(struct wined3d_shader *shader, 4240 const struct arb_ps_compile_args *args) 4241 { 4242 struct wined3d_device *device = shader->device; 4243 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 4244 const struct wined3d_d3d_info *d3d_info = &device->adapter->d3d_info; 4245 UINT i; 4246 DWORD new_size; 4247 struct arb_ps_compiled_shader *new_array; 4248 struct wined3d_string_buffer buffer; 4249 struct arb_pshader_private *shader_data; 4250 GLuint ret; 4251 4252 if (!shader->backend_data) 4253 { 4254 struct shader_arb_priv *priv = device->shader_priv; 4255 4256 shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data)); 4257 shader_data = shader->backend_data; 4258 shader_data->clamp_consts = shader->reg_maps.shader_version.major == 1; 4259 4260 if (shader->reg_maps.shader_version.major < 3) 4261 shader_data->input_signature_idx = ~0U; 4262 else 4263 shader_data->input_signature_idx = find_input_signature(priv, &shader->input_signature); 4264 4265 TRACE("Shader got assigned input signature index %u\n", shader_data->input_signature_idx); 4266 4267 if (!d3d_info->vs_clipping) 4268 shader_data->clipplane_emulation = shader_find_free_input_register(&shader->reg_maps, 4269 d3d_info->limits.ffp_blend_stages - 1); 4270 else 4271 shader_data->clipplane_emulation = ~0U; 4272 } 4273 shader_data = shader->backend_data; 4274 4275 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 4276 * so a linear search is more performant than a hashmap or a binary search 4277 * (cache coherency etc) 4278 */ 4279 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4280 { 4281 if (!memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args))) 4282 return &shader_data->gl_shaders[i]; 4283 } 4284 4285 TRACE("No matching GL shader found, compiling a new shader\n"); 4286 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 4287 if (shader_data->num_gl_shaders) 4288 { 4289 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 4290 new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders, 4291 new_size * sizeof(*shader_data->gl_shaders)); 4292 } else { 4293 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders)); 4294 new_size = 1; 4295 } 4296 4297 if(!new_array) { 4298 ERR("Out of memory\n"); 4299 return 0; 4300 } 4301 shader_data->gl_shaders = new_array; 4302 shader_data->shader_array_size = new_size; 4303 } 4304 4305 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 4306 4307 pixelshader_update_resource_types(shader, args->super.tex_types); 4308 4309 if (!string_buffer_init(&buffer)) 4310 { 4311 ERR("Failed to initialize shader buffer.\n"); 4312 return 0; 4313 } 4314 4315 ret = shader_arb_generate_pshader(shader, gl_info, &buffer, args, 4316 &shader_data->gl_shaders[shader_data->num_gl_shaders]); 4317 string_buffer_free(&buffer); 4318 shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret; 4319 4320 return &shader_data->gl_shaders[shader_data->num_gl_shaders++]; 4321 } 4322 4323 static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new, 4324 const DWORD use_map, BOOL skip_int) { 4325 if((stored->super.swizzle_map & use_map) != new->super.swizzle_map) return FALSE; 4326 if(stored->super.clip_enabled != new->super.clip_enabled) return FALSE; 4327 if(stored->super.fog_src != new->super.fog_src) return FALSE; 4328 if(stored->clip.boolclip_compare != new->clip.boolclip_compare) return FALSE; 4329 if(stored->ps_signature != new->ps_signature) return FALSE; 4330 if(stored->vertex.samplers_compare != new->vertex.samplers_compare) return FALSE; 4331 if(skip_int) return TRUE; 4332 4333 return !memcmp(stored->loop_ctrl, new->loop_ctrl, sizeof(stored->loop_ctrl)); 4334 } 4335 4336 static struct arb_vs_compiled_shader *find_arb_vshader(struct wined3d_shader *shader, 4337 const struct wined3d_gl_info *gl_info, DWORD use_map, const struct arb_vs_compile_args *args, 4338 const struct wined3d_shader_signature *ps_input_sig) 4339 { 4340 UINT i; 4341 DWORD new_size; 4342 struct arb_vs_compiled_shader *new_array; 4343 struct wined3d_string_buffer buffer; 4344 struct arb_vshader_private *shader_data; 4345 GLuint ret; 4346 4347 if (!shader->backend_data) 4348 { 4349 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 4350 4351 shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data)); 4352 shader_data = shader->backend_data; 4353 4354 if ((gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) 4355 && reg_maps->min_rel_offset <= reg_maps->max_rel_offset) 4356 { 4357 if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 127) 4358 { 4359 FIXME("The difference between the minimum and maximum relative offset is > 127.\n"); 4360 FIXME("Which this OpenGL implementation does not support. Try using GLSL.\n"); 4361 FIXME("Min: %u, Max: %u.\n", reg_maps->min_rel_offset, reg_maps->max_rel_offset); 4362 } 4363 else if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 63) 4364 shader_data->rel_offset = reg_maps->min_rel_offset + 63; 4365 else if (reg_maps->max_rel_offset > 63) 4366 shader_data->rel_offset = reg_maps->min_rel_offset; 4367 } 4368 } 4369 shader_data = shader->backend_data; 4370 4371 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 4372 * so a linear search is more performant than a hashmap or a binary search 4373 * (cache coherency etc) 4374 */ 4375 for(i = 0; i < shader_data->num_gl_shaders; i++) { 4376 if (vs_args_equal(&shader_data->gl_shaders[i].args, args, 4377 use_map, gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])) 4378 { 4379 return &shader_data->gl_shaders[i]; 4380 } 4381 } 4382 4383 TRACE("No matching GL shader found, compiling a new shader\n"); 4384 4385 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 4386 if (shader_data->num_gl_shaders) 4387 { 4388 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 4389 new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders, 4390 new_size * sizeof(*shader_data->gl_shaders)); 4391 } else { 4392 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders)); 4393 new_size = 1; 4394 } 4395 4396 if(!new_array) { 4397 ERR("Out of memory\n"); 4398 return 0; 4399 } 4400 shader_data->gl_shaders = new_array; 4401 shader_data->shader_array_size = new_size; 4402 } 4403 4404 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 4405 4406 if (!string_buffer_init(&buffer)) 4407 { 4408 ERR("Failed to initialize shader buffer.\n"); 4409 return 0; 4410 } 4411 4412 ret = shader_arb_generate_vshader(shader, gl_info, &buffer, args, 4413 &shader_data->gl_shaders[shader_data->num_gl_shaders], 4414 ps_input_sig); 4415 string_buffer_free(&buffer); 4416 shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret; 4417 4418 return &shader_data->gl_shaders[shader_data->num_gl_shaders++]; 4419 } 4420 4421 static void find_arb_ps_compile_args(const struct wined3d_state *state, 4422 const struct wined3d_context *context, const struct wined3d_shader *shader, 4423 struct arb_ps_compile_args *args) 4424 { 4425 const struct wined3d_gl_info *gl_info = context->gl_info; 4426 const struct wined3d_d3d_info *d3d_info = context->d3d_info; 4427 int i; 4428 WORD int_skip; 4429 4430 find_ps_compile_args(state, shader, context->stream_info.position_transformed, &args->super, context); 4431 4432 /* This forces all local boolean constants to 1 to make them stateblock independent */ 4433 args->bools = shader->reg_maps.local_bool_consts; 4434 4435 for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) 4436 { 4437 if (state->ps_consts_b[i]) 4438 args->bools |= ( 1u << i); 4439 } 4440 4441 /* Only enable the clip plane emulation KIL if at least one clipplane is enabled. The KIL instruction 4442 * is quite expensive because it forces the driver to disable early Z discards. It is cheaper to 4443 * duplicate the shader than have a no-op KIL instruction in every shader 4444 */ 4445 if (!d3d_info->vs_clipping && use_vs(state) 4446 && state->render_states[WINED3D_RS_CLIPPING] 4447 && state->render_states[WINED3D_RS_CLIPPLANEENABLE]) 4448 args->clip = 1; 4449 else 4450 args->clip = 0; 4451 4452 /* Skip if unused or local, or supported natively */ 4453 int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts; 4454 if (int_skip == 0xffff || gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]) 4455 { 4456 memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl)); 4457 return; 4458 } 4459 4460 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 4461 { 4462 if (int_skip & (1u << i)) 4463 { 4464 args->loop_ctrl[i][0] = 0; 4465 args->loop_ctrl[i][1] = 0; 4466 args->loop_ctrl[i][2] = 0; 4467 } 4468 else 4469 { 4470 args->loop_ctrl[i][0] = state->ps_consts_i[i].x; 4471 args->loop_ctrl[i][1] = state->ps_consts_i[i].y; 4472 args->loop_ctrl[i][2] = state->ps_consts_i[i].z; 4473 } 4474 } 4475 } 4476 4477 static void find_arb_vs_compile_args(const struct wined3d_state *state, 4478 const struct wined3d_context *context, const struct wined3d_shader *shader, 4479 struct arb_vs_compile_args *args) 4480 { 4481 const struct wined3d_device *device = shader->device; 4482 const struct wined3d_adapter *adapter = device->adapter; 4483 const struct wined3d_gl_info *gl_info = context->gl_info; 4484 const struct wined3d_d3d_info *d3d_info = context->d3d_info; 4485 int i; 4486 WORD int_skip; 4487 4488 find_vs_compile_args(state, shader, context->stream_info.swizzle_map, &args->super, context); 4489 4490 args->clip.boolclip_compare = 0; 4491 if (use_ps(state)) 4492 { 4493 const struct wined3d_shader *ps = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 4494 const struct arb_pshader_private *shader_priv = ps->backend_data; 4495 args->ps_signature = shader_priv->input_signature_idx; 4496 4497 args->clip.boolclip.clip_texcoord = shader_priv->clipplane_emulation + 1; 4498 } 4499 else 4500 { 4501 args->ps_signature = ~0; 4502 if (!d3d_info->vs_clipping && adapter->fragment_pipe == &arbfp_fragment_pipeline) 4503 args->clip.boolclip.clip_texcoord = ffp_clip_emul(context) ? d3d_info->limits.ffp_blend_stages : 0; 4504 /* Otherwise: Setting boolclip_compare set clip_texcoord to 0 */ 4505 } 4506 4507 if (args->clip.boolclip.clip_texcoord) 4508 { 4509 if (state->render_states[WINED3D_RS_CLIPPING]) 4510 args->clip.boolclip.clipplane_mask = (unsigned char)state->render_states[WINED3D_RS_CLIPPLANEENABLE]; 4511 /* clipplane_mask was set to 0 by setting boolclip_compare to 0 */ 4512 } 4513 4514 /* This forces all local boolean constants to 1 to make them stateblock independent */ 4515 args->clip.boolclip.bools = shader->reg_maps.local_bool_consts; 4516 /* TODO: Figure out if it would be better to store bool constants as bitmasks in the stateblock */ 4517 for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) 4518 { 4519 if (state->vs_consts_b[i]) 4520 args->clip.boolclip.bools |= (1u << i); 4521 } 4522 4523 args->vertex.samplers[0] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 0]; 4524 args->vertex.samplers[1] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 1]; 4525 args->vertex.samplers[2] = context->tex_unit_map[MAX_FRAGMENT_SAMPLERS + 2]; 4526 args->vertex.samplers[3] = 0; 4527 4528 /* Skip if unused or local */ 4529 int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts; 4530 /* This is about flow control, not clipping. */ 4531 if (int_skip == 0xffff || gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]) 4532 { 4533 memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl)); 4534 return; 4535 } 4536 4537 for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) 4538 { 4539 if (int_skip & (1u << i)) 4540 { 4541 args->loop_ctrl[i][0] = 0; 4542 args->loop_ctrl[i][1] = 0; 4543 args->loop_ctrl[i][2] = 0; 4544 } 4545 else 4546 { 4547 args->loop_ctrl[i][0] = state->vs_consts_i[i].x; 4548 args->loop_ctrl[i][1] = state->vs_consts_i[i].y; 4549 args->loop_ctrl[i][2] = state->vs_consts_i[i].z; 4550 } 4551 } 4552 } 4553 4554 /* Context activation is done by the caller. */ 4555 static void shader_arb_select(void *shader_priv, struct wined3d_context *context, 4556 const struct wined3d_state *state) 4557 { 4558 struct shader_arb_priv *priv = shader_priv; 4559 const struct wined3d_gl_info *gl_info = context->gl_info; 4560 int i; 4561 4562 /* Deal with pixel shaders first so the vertex shader arg function has the input signature ready */ 4563 if (use_ps(state)) 4564 { 4565 struct wined3d_shader *ps = state->shader[WINED3D_SHADER_TYPE_PIXEL]; 4566 struct arb_ps_compile_args compile_args; 4567 struct arb_ps_compiled_shader *compiled; 4568 4569 TRACE("Using pixel shader %p.\n", ps); 4570 find_arb_ps_compile_args(state, context, ps, &compile_args); 4571 compiled = find_arb_pshader(ps, &compile_args); 4572 priv->current_fprogram_id = compiled->prgId; 4573 priv->compiled_fprog = compiled; 4574 4575 /* Bind the fragment program */ 4576 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id)); 4577 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id);"); 4578 4579 if (!priv->use_arbfp_fixed_func) 4580 priv->fragment_pipe->enable_extension(gl_info, FALSE); 4581 4582 /* Enable OpenGL fragment programs. */ 4583 gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB); 4584 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);"); 4585 4586 TRACE("Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", priv->current_fprogram_id); 4587 4588 /* Pixel Shader 1.x constants are clamped to [-1;1], Pixel Shader 2.0 constants are not. If switching between 4589 * a 1.x and newer shader, reload the first 8 constants 4590 */ 4591 if (priv->last_ps_const_clamped != ((struct arb_pshader_private *)ps->backend_data)->clamp_consts) 4592 { 4593 priv->last_ps_const_clamped = ((struct arb_pshader_private *)ps->backend_data)->clamp_consts; 4594 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, 8); 4595 for(i = 0; i < 8; i++) 4596 { 4597 priv->pshader_const_dirty[i] = 1; 4598 } 4599 /* Also takes care of loading local constants */ 4600 shader_arb_load_constants_internal(shader_priv, context, state, TRUE, FALSE, TRUE); 4601 } 4602 else 4603 { 4604 UINT rt_height = state->fb->render_targets[0]->height; 4605 shader_arb_ps_local_constants(compiled, context, state, rt_height); 4606 } 4607 4608 /* Force constant reloading for the NP2 fixup (see comment in shader_glsl_select for more info) */ 4609 if (compiled->np2fixup_info.super.active) 4610 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; 4611 4612 if (ps->load_local_constsF) 4613 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_F; 4614 } 4615 else 4616 { 4617 if (gl_info->supported[ARB_FRAGMENT_PROGRAM] && !priv->use_arbfp_fixed_func) 4618 { 4619 /* Disable only if we're not using arbfp fixed function fragment 4620 * processing. If this is used, keep GL_FRAGMENT_PROGRAM_ARB 4621 * enabled, and the fixed function pipeline will bind the fixed 4622 * function replacement shader. */ 4623 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 4624 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 4625 priv->current_fprogram_id = 0; 4626 } 4627 priv->fragment_pipe->enable_extension(gl_info, TRUE); 4628 } 4629 4630 if (use_vs(state)) 4631 { 4632 struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; 4633 struct arb_vs_compile_args compile_args; 4634 struct arb_vs_compiled_shader *compiled; 4635 const struct wined3d_shader_signature *ps_input_sig; 4636 4637 TRACE("Using vertex shader %p\n", vs); 4638 find_arb_vs_compile_args(state, context, vs, &compile_args); 4639 4640 /* Instead of searching for the signature in the signature list, read the one from the 4641 * current pixel shader. It's maybe not the shader where the signature came from, but it 4642 * is the same signature and faster to find. */ 4643 if (compile_args.ps_signature == ~0U) 4644 ps_input_sig = NULL; 4645 else 4646 ps_input_sig = &state->shader[WINED3D_SHADER_TYPE_PIXEL]->input_signature; 4647 4648 compiled = find_arb_vshader(vs, context->gl_info, context->stream_info.use_map, 4649 &compile_args, ps_input_sig); 4650 priv->current_vprogram_id = compiled->prgId; 4651 priv->compiled_vprog = compiled; 4652 4653 /* Bind the vertex program */ 4654 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id)); 4655 checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id);"); 4656 4657 priv->vertex_pipe->vp_enable(gl_info, FALSE); 4658 4659 /* Enable OpenGL vertex programs */ 4660 gl_info->gl_ops.gl.p_glEnable(GL_VERTEX_PROGRAM_ARB); 4661 checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);"); 4662 TRACE("Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", priv->current_vprogram_id); 4663 shader_arb_vs_local_constants(compiled, context, state); 4664 4665 if(priv->last_vs_color_unclamp != compiled->need_color_unclamp) { 4666 priv->last_vs_color_unclamp = compiled->need_color_unclamp; 4667 4668 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT]) 4669 { 4670 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, !compiled->need_color_unclamp)); 4671 checkGLcall("glClampColorARB"); 4672 } else { 4673 FIXME("vertex color clamp needs to be changed, but extension not supported.\n"); 4674 } 4675 } 4676 4677 if (vs->load_local_constsF) 4678 context->constant_update_mask |= WINED3D_SHADER_CONST_VS_F; 4679 } 4680 else 4681 { 4682 if (gl_info->supported[ARB_VERTEX_PROGRAM]) 4683 { 4684 priv->current_vprogram_id = 0; 4685 gl_info->gl_ops.gl.p_glDisable(GL_VERTEX_PROGRAM_ARB); 4686 checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)"); 4687 } 4688 priv->vertex_pipe->vp_enable(gl_info, TRUE); 4689 } 4690 } 4691 4692 static void shader_arb_select_compute(void *shader_priv, struct wined3d_context *context, 4693 const struct wined3d_state *state) 4694 { 4695 ERR("Compute pipeline not supported by the ARB shader backend.\n"); 4696 } 4697 4698 /* Context activation is done by the caller. */ 4699 static void shader_arb_disable(void *shader_priv, struct wined3d_context *context) 4700 { 4701 const struct wined3d_gl_info *gl_info = context->gl_info; 4702 struct shader_arb_priv *priv = shader_priv; 4703 4704 if (gl_info->supported[ARB_FRAGMENT_PROGRAM]) 4705 { 4706 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 4707 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 4708 priv->current_fprogram_id = 0; 4709 } 4710 priv->fragment_pipe->enable_extension(gl_info, FALSE); 4711 4712 if (gl_info->supported[ARB_VERTEX_PROGRAM]) 4713 { 4714 priv->current_vprogram_id = 0; 4715 gl_info->gl_ops.gl.p_glDisable(GL_VERTEX_PROGRAM_ARB); 4716 checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)"); 4717 } 4718 priv->vertex_pipe->vp_enable(gl_info, FALSE); 4719 4720 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT] && priv->last_vs_color_unclamp) 4721 { 4722 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, GL_FIXED_ONLY_ARB)); 4723 checkGLcall("glClampColorARB"); 4724 priv->last_vs_color_unclamp = FALSE; 4725 } 4726 4727 context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL) 4728 | (1u << WINED3D_SHADER_TYPE_VERTEX) 4729 | (1u << WINED3D_SHADER_TYPE_GEOMETRY) 4730 | (1u << WINED3D_SHADER_TYPE_HULL) 4731 | (1u << WINED3D_SHADER_TYPE_DOMAIN) 4732 | (1u << WINED3D_SHADER_TYPE_COMPUTE); 4733 } 4734 4735 static void shader_arb_destroy(struct wined3d_shader *shader) 4736 { 4737 struct wined3d_device *device = shader->device; 4738 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 4739 4740 if (shader_is_pshader_version(shader->reg_maps.shader_version.type)) 4741 { 4742 struct arb_pshader_private *shader_data = shader->backend_data; 4743 UINT i; 4744 4745 if(!shader_data) return; /* This can happen if a shader was never compiled */ 4746 4747 if (shader_data->num_gl_shaders) 4748 { 4749 struct wined3d_context *context = context_acquire(device, NULL, 0); 4750 4751 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4752 { 4753 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId)); 4754 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))"); 4755 } 4756 4757 context_release(context); 4758 } 4759 4760 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders); 4761 HeapFree(GetProcessHeap(), 0, shader_data); 4762 shader->backend_data = NULL; 4763 } 4764 else 4765 { 4766 struct arb_vshader_private *shader_data = shader->backend_data; 4767 UINT i; 4768 4769 if(!shader_data) return; /* This can happen if a shader was never compiled */ 4770 4771 if (shader_data->num_gl_shaders) 4772 { 4773 struct wined3d_context *context = context_acquire(device, NULL, 0); 4774 4775 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4776 { 4777 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId)); 4778 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))"); 4779 } 4780 4781 context_release(context); 4782 } 4783 4784 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders); 4785 HeapFree(GetProcessHeap(), 0, shader_data); 4786 shader->backend_data = NULL; 4787 } 4788 } 4789 4790 static int sig_tree_compare(const void *key, const struct wine_rb_entry *entry) 4791 { 4792 struct ps_signature *e = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 4793 return compare_sig(key, &e->sig); 4794 } 4795 4796 static HRESULT shader_arb_alloc(struct wined3d_device *device, const struct wined3d_vertex_pipe_ops *vertex_pipe, 4797 const struct fragment_pipeline *fragment_pipe) 4798 { 4799 struct shader_arb_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*priv)); 4800 struct fragment_caps fragment_caps; 4801 void *vertex_priv, *fragment_priv; 4802 const struct wined3d_d3d_info *d3d_info = &device->adapter->d3d_info; 4803 4804 if (!(vertex_priv = vertex_pipe->vp_alloc(&arb_program_shader_backend, priv))) 4805 { 4806 ERR("Failed to initialize vertex pipe.\n"); 4807 HeapFree(GetProcessHeap(), 0, priv); 4808 return E_FAIL; 4809 } 4810 4811 if (!(fragment_priv = fragment_pipe->alloc_private(&arb_program_shader_backend, priv))) 4812 { 4813 ERR("Failed to initialize fragment pipe.\n"); 4814 vertex_pipe->vp_free(device); 4815 HeapFree(GetProcessHeap(), 0, priv); 4816 return E_FAIL; 4817 } 4818 4819 memset(priv->vshader_const_dirty, 1, 4820 sizeof(*priv->vshader_const_dirty) * d3d_info->limits.vs_uniform_count); 4821 memset(priv->pshader_const_dirty, 1, 4822 sizeof(*priv->pshader_const_dirty) * d3d_info->limits.ps_uniform_count); 4823 4824 wine_rb_init(&priv->signature_tree, sig_tree_compare); 4825 4826 priv->vertex_pipe = vertex_pipe; 4827 priv->fragment_pipe = fragment_pipe; 4828 fragment_pipe->get_caps(&device->adapter->gl_info, &fragment_caps); 4829 priv->ffp_proj_control = fragment_caps.wined3d_caps & WINED3D_FRAGMENT_CAP_PROJ_CONTROL; 4830 4831 device->vertex_priv = vertex_priv; 4832 device->fragment_priv = fragment_priv; 4833 device->shader_priv = priv; 4834 4835 return WINED3D_OK; 4836 } 4837 4838 static void release_signature(struct wine_rb_entry *entry, void *context) 4839 { 4840 struct ps_signature *sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 4841 unsigned int i; 4842 4843 for (i = 0; i < sig->sig.element_count; ++i) 4844 { 4845 HeapFree(GetProcessHeap(), 0, (char *)sig->sig.elements[i].semantic_name); 4846 } 4847 HeapFree(GetProcessHeap(), 0, sig->sig.elements); 4848 HeapFree(GetProcessHeap(), 0, sig); 4849 } 4850 4851 /* Context activation is done by the caller. */ 4852 static void shader_arb_free(struct wined3d_device *device) 4853 { 4854 struct shader_arb_priv *priv = device->shader_priv; 4855 4856 wine_rb_destroy(&priv->signature_tree, release_signature, NULL); 4857 priv->fragment_pipe->free_private(device); 4858 priv->vertex_pipe->vp_free(device); 4859 HeapFree(GetProcessHeap(), 0, device->shader_priv); 4860 } 4861 4862 static BOOL shader_arb_allocate_context_data(struct wined3d_context *context) 4863 { 4864 return TRUE; 4865 } 4866 4867 static void shader_arb_free_context_data(struct wined3d_context *context) 4868 { 4869 struct shader_arb_priv *priv; 4870 4871 priv = context->device->shader_priv; 4872 if (priv->last_context == context) 4873 priv->last_context = NULL; 4874 } 4875 4876 static void shader_arb_init_context_state(struct wined3d_context *context) {} 4877 4878 static void shader_arb_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps) 4879 { 4880 if (gl_info->supported[ARB_VERTEX_PROGRAM]) 4881 { 4882 DWORD vs_consts; 4883 UINT vs_version; 4884 4885 /* 96 is the minimum allowed value of MAX_PROGRAM_ENV_PARAMETERS_ARB 4886 * for vertex programs. If the native limit is less than that it's 4887 * not very useful, and e.g. Mesa swrast returns 0, probably to 4888 * indicate it's a software implementation. */ 4889 if (gl_info->limits.arb_vs_native_constants < 96) 4890 vs_consts = gl_info->limits.arb_vs_float_constants; 4891 else 4892 vs_consts = min(gl_info->limits.arb_vs_float_constants, gl_info->limits.arb_vs_native_constants); 4893 4894 if (gl_info->supported[NV_VERTEX_PROGRAM3]) 4895 { 4896 vs_version = 3; 4897 TRACE("Hardware vertex shader version 3.0 enabled (NV_VERTEX_PROGRAM3)\n"); 4898 } 4899 else if (vs_consts >= 256) 4900 { 4901 /* Shader Model 2.0 requires at least 256 vertex shader constants */ 4902 vs_version = 2; 4903 TRACE("Hardware vertex shader version 2.0 enabled (ARB_PROGRAM)\n"); 4904 } 4905 else 4906 { 4907 vs_version = 1; 4908 TRACE("Hardware vertex shader version 1.1 enabled (ARB_PROGRAM)\n"); 4909 } 4910 caps->vs_version = min(wined3d_settings.max_sm_vs, vs_version); 4911 caps->vs_uniform_count = min(WINED3D_MAX_VS_CONSTS_F, vs_consts); 4912 } 4913 else 4914 { 4915 caps->vs_version = 0; 4916 caps->vs_uniform_count = 0; 4917 } 4918 4919 caps->hs_version = 0; 4920 caps->ds_version = 0; 4921 caps->gs_version = 0; 4922 caps->cs_version = 0; 4923 4924 if (gl_info->supported[ARB_FRAGMENT_PROGRAM]) 4925 { 4926 DWORD ps_consts; 4927 UINT ps_version; 4928 4929 /* Similar as above for vertex programs, but the minimum for fragment 4930 * programs is 24. */ 4931 if (gl_info->limits.arb_ps_native_constants < 24) 4932 ps_consts = gl_info->limits.arb_ps_float_constants; 4933 else 4934 ps_consts = min(gl_info->limits.arb_ps_float_constants, gl_info->limits.arb_ps_native_constants); 4935 4936 if (gl_info->supported[NV_FRAGMENT_PROGRAM2]) 4937 { 4938 ps_version = 3; 4939 TRACE("Hardware pixel shader version 3.0 enabled (NV_FRAGMENT_PROGRAM2)\n"); 4940 } 4941 else if (ps_consts >= 32) 4942 { 4943 /* Shader Model 2.0 requires at least 32 pixel shader constants */ 4944 ps_version = 2; 4945 TRACE("Hardware pixel shader version 2.0 enabled (ARB_PROGRAM)\n"); 4946 } 4947 else 4948 { 4949 ps_version = 1; 4950 TRACE("Hardware pixel shader version 1.4 enabled (ARB_PROGRAM)\n"); 4951 } 4952 caps->ps_version = min(wined3d_settings.max_sm_ps, ps_version); 4953 caps->ps_uniform_count = min(WINED3D_MAX_PS_CONSTS_F, ps_consts); 4954 caps->ps_1x_max_value = 8.0f; 4955 } 4956 else 4957 { 4958 caps->ps_version = 0; 4959 caps->ps_uniform_count = 0; 4960 caps->ps_1x_max_value = 0.0f; 4961 } 4962 4963 caps->varying_count = 0; 4964 caps->wined3d_caps = WINED3D_SHADER_CAP_SRGB_WRITE; 4965 if (use_nv_clip(gl_info)) 4966 caps->wined3d_caps |= WINED3D_SHADER_CAP_VS_CLIPPING; 4967 } 4968 4969 static BOOL shader_arb_color_fixup_supported(struct color_fixup_desc fixup) 4970 { 4971 /* We support everything except complex conversions. */ 4972 return !is_complex_fixup(fixup); 4973 } 4974 4975 static void shader_arb_add_instruction_modifiers(const struct wined3d_shader_instruction *ins) { 4976 DWORD shift; 4977 char write_mask[20], regstr[50]; 4978 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 4979 BOOL is_color = FALSE; 4980 const struct wined3d_shader_dst_param *dst; 4981 4982 if (!ins->dst_count) return; 4983 4984 dst = &ins->dst[0]; 4985 shift = dst->shift; 4986 if (!shift) return; /* Saturate alone is handled by the instructions */ 4987 4988 shader_arb_get_write_mask(ins, dst, write_mask); 4989 shader_arb_get_register_name(ins, &dst->reg, regstr, &is_color); 4990 4991 /* Generate a line that does the output modifier computation 4992 * FIXME: _SAT vs shift? _SAT alone is already handled in the instructions, if this 4993 * maps problems in e.g. _d4_sat modify shader_arb_get_modifier 4994 */ 4995 shader_addline(buffer, "MUL%s %s%s, %s, %s;\n", shader_arb_get_modifier(ins), 4996 regstr, write_mask, regstr, shift_tab[shift]); 4997 } 4998 4999 static const SHADER_HANDLER shader_arb_instruction_handler_table[WINED3DSIH_TABLE_SIZE] = 5000 { 5001 /* WINED3DSIH_ABS */ shader_hw_map2gl, 5002 /* WINED3DSIH_ADD */ shader_hw_map2gl, 5003 /* WINED3DSIH_AND */ NULL, 5004 /* WINED3DSIH_ATOMIC_AND */ NULL, 5005 /* WINED3DSIH_ATOMIC_CMP_STORE */ NULL, 5006 /* WINED3DSIH_ATOMIC_IADD */ NULL, 5007 /* WINED3DSIH_ATOMIC_IMAX */ NULL, 5008 /* WINED3DSIH_ATOMIC_IMIN */ NULL, 5009 /* WINED3DSIH_ATOMIC_OR */ NULL, 5010 /* WINED3DSIH_ATOMIC_UMAX */ NULL, 5011 /* WINED3DSIH_ATOMIC_UMIN */ NULL, 5012 /* WINED3DSIH_ATOMIC_XOR */ NULL, 5013 /* WINED3DSIH_BEM */ pshader_hw_bem, 5014 /* WINED3DSIH_BFI */ NULL, 5015 /* WINED3DSIH_BFREV */ NULL, 5016 /* WINED3DSIH_BREAK */ shader_hw_break, 5017 /* WINED3DSIH_BREAKC */ shader_hw_breakc, 5018 /* WINED3DSIH_BREAKP */ NULL, 5019 /* WINED3DSIH_BUFINFO */ NULL, 5020 /* WINED3DSIH_CALL */ shader_hw_call, 5021 /* WINED3DSIH_CALLNZ */ NULL, 5022 /* WINED3DSIH_CASE */ NULL, 5023 /* WINED3DSIH_CMP */ pshader_hw_cmp, 5024 /* WINED3DSIH_CND */ pshader_hw_cnd, 5025 /* WINED3DSIH_CONTINUE */ NULL, 5026 /* WINED3DSIH_CONTINUEP */ NULL, 5027 /* WINED3DSIH_COUNTBITS */ NULL, 5028 /* WINED3DSIH_CRS */ shader_hw_map2gl, 5029 /* WINED3DSIH_CUT */ NULL, 5030 /* WINED3DSIH_CUT_STREAM */ NULL, 5031 /* WINED3DSIH_DCL */ shader_hw_nop, 5032 /* WINED3DSIH_DCL_CONSTANT_BUFFER */ shader_hw_nop, 5033 /* WINED3DSIH_DCL_FUNCTION_BODY */ NULL, 5034 /* WINED3DSIH_DCL_FUNCTION_TABLE */ NULL, 5035 /* WINED3DSIH_DCL_GLOBAL_FLAGS */ NULL, 5036 /* WINED3DSIH_DCL_GS_INSTANCES */ NULL, 5037 /* WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT */ NULL, 5038 /* WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */ NULL, 5039 /* WINED3DSIH_DCL_HS_MAX_TESSFACTOR */ NULL, 5040 /* WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER */ NULL, 5041 /* WINED3DSIH_DCL_INDEX_RANGE */ NULL, 5042 /* WINED3DSIH_DCL_INDEXABLE_TEMP */ NULL, 5043 /* WINED3DSIH_DCL_INPUT */ NULL, 5044 /* WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT */ NULL, 5045 /* WINED3DSIH_DCL_INPUT_PRIMITIVE */ shader_hw_nop, 5046 /* WINED3DSIH_DCL_INPUT_PS */ NULL, 5047 /* WINED3DSIH_DCL_INPUT_PS_SGV */ NULL, 5048 /* WINED3DSIH_DCL_INPUT_PS_SIV */ NULL, 5049 /* WINED3DSIH_DCL_INPUT_SGV */ NULL, 5050 /* WINED3DSIH_DCL_INPUT_SIV */ NULL, 5051 /* WINED3DSIH_DCL_INTERFACE */ NULL, 5052 /* WINED3DSIH_DCL_OUTPUT */ NULL, 5053 /* WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT */ NULL, 5054 /* WINED3DSIH_DCL_OUTPUT_SIV */ NULL, 5055 /* WINED3DSIH_DCL_OUTPUT_TOPOLOGY */ shader_hw_nop, 5056 /* WINED3DSIH_DCL_RESOURCE_RAW */ NULL, 5057 /* WINED3DSIH_DCL_RESOURCE_STRUCTURED */ NULL, 5058 /* WINED3DSIH_DCL_SAMPLER */ NULL, 5059 /* WINED3DSIH_DCL_STREAM */ NULL, 5060 /* WINED3DSIH_DCL_TEMPS */ NULL, 5061 /* WINED3DSIH_DCL_TESSELLATOR_DOMAIN */ NULL, 5062 /* WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE */ NULL, 5063 /* WINED3DSIH_DCL_TESSELLATOR_PARTITIONING */ NULL, 5064 /* WINED3DSIH_DCL_TGSM_RAW */ NULL, 5065 /* WINED3DSIH_DCL_TGSM_STRUCTURED */ NULL, 5066 /* WINED3DSIH_DCL_THREAD_GROUP */ NULL, 5067 /* WINED3DSIH_DCL_UAV_RAW */ NULL, 5068 /* WINED3DSIH_DCL_UAV_STRUCTURED */ NULL, 5069 /* WINED3DSIH_DCL_UAV_TYPED */ NULL, 5070 /* WINED3DSIH_DCL_VERTICES_OUT */ shader_hw_nop, 5071 /* WINED3DSIH_DEF */ shader_hw_nop, 5072 /* WINED3DSIH_DEFAULT */ NULL, 5073 /* WINED3DSIH_DEFB */ shader_hw_nop, 5074 /* WINED3DSIH_DEFI */ shader_hw_nop, 5075 /* WINED3DSIH_DIV */ NULL, 5076 /* WINED3DSIH_DP2 */ NULL, 5077 /* WINED3DSIH_DP2ADD */ pshader_hw_dp2add, 5078 /* WINED3DSIH_DP3 */ shader_hw_map2gl, 5079 /* WINED3DSIH_DP4 */ shader_hw_map2gl, 5080 /* WINED3DSIH_DST */ shader_hw_map2gl, 5081 /* WINED3DSIH_DSX */ shader_hw_map2gl, 5082 /* WINED3DSIH_DSX_COARSE */ NULL, 5083 /* WINED3DSIH_DSX_FINE */ NULL, 5084 /* WINED3DSIH_DSY */ shader_hw_dsy, 5085 /* WINED3DSIH_DSY_COARSE */ NULL, 5086 /* WINED3DSIH_DSY_FINE */ NULL, 5087 /* WINED3DSIH_EVAL_SAMPLE_INDEX */ NULL, 5088 /* WINED3DSIH_ELSE */ shader_hw_else, 5089 /* WINED3DSIH_EMIT */ NULL, 5090 /* WINED3DSIH_EMIT_STREAM */ NULL, 5091 /* WINED3DSIH_ENDIF */ shader_hw_endif, 5092 /* WINED3DSIH_ENDLOOP */ shader_hw_endloop, 5093 /* WINED3DSIH_ENDREP */ shader_hw_endrep, 5094 /* WINED3DSIH_ENDSWITCH */ NULL, 5095 /* WINED3DSIH_EQ */ NULL, 5096 /* WINED3DSIH_EXP */ shader_hw_scalar_op, 5097 /* WINED3DSIH_EXPP */ shader_hw_scalar_op, 5098 /* WINED3DSIH_F16TOF32 */ NULL, 5099 /* WINED3DSIH_F32TOF16 */ NULL, 5100 /* WINED3DSIH_FCALL */ NULL, 5101 /* WINED3DSIH_FIRSTBIT_HI */ NULL, 5102 /* WINED3DSIH_FIRSTBIT_LO */ NULL, 5103 /* WINED3DSIH_FIRSTBIT_SHI */ NULL, 5104 /* WINED3DSIH_FRC */ shader_hw_map2gl, 5105 /* WINED3DSIH_FTOI */ NULL, 5106 /* WINED3DSIH_FTOU */ NULL, 5107 /* WINED3DSIH_GATHER4 */ NULL, 5108 /* WINED3DSIH_GATHER4_C */ NULL, 5109 /* WINED3DSIH_GATHER4_PO */ NULL, 5110 /* WINED3DSIH_GATHER4_PO_C */ NULL, 5111 /* WINED3DSIH_GE */ NULL, 5112 /* WINED3DSIH_HS_CONTROL_POINT_PHASE */ NULL, 5113 /* WINED3DSIH_HS_DECLS */ NULL, 5114 /* WINED3DSIH_HS_FORK_PHASE */ NULL, 5115 /* WINED3DSIH_HS_JOIN_PHASE */ NULL, 5116 /* WINED3DSIH_IADD */ NULL, 5117 /* WINED3DSIH_IBFE */ NULL, 5118 /* WINED3DSIH_IEQ */ NULL, 5119 /* WINED3DSIH_IF */ NULL /* Hardcoded into the shader */, 5120 /* WINED3DSIH_IFC */ shader_hw_ifc, 5121 /* WINED3DSIH_IGE */ NULL, 5122 /* WINED3DSIH_ILT */ NULL, 5123 /* WINED3DSIH_IMAD */ NULL, 5124 /* WINED3DSIH_IMAX */ NULL, 5125 /* WINED3DSIH_IMIN */ NULL, 5126 /* WINED3DSIH_IMM_ATOMIC_ALLOC */ NULL, 5127 /* WINED3DSIH_IMM_ATOMIC_AND */ NULL, 5128 /* WINED3DSIH_IMM_ATOMIC_CMP_EXCH */ NULL, 5129 /* WINED3DSIH_IMM_ATOMIC_CONSUME */ NULL, 5130 /* WINED3DSIH_IMM_ATOMIC_EXCH */ NULL, 5131 /* WINED3DSIH_IMM_ATOMIC_IADD */ NULL, 5132 /* WINED3DSIH_IMM_ATOMIC_IMAX */ NULL, 5133 /* WINED3DSIH_IMM_ATOMIC_IMIN */ NULL, 5134 /* WINED3DSIH_IMM_ATOMIC_OR */ NULL, 5135 /* WINED3DSIH_IMM_ATOMIC_UMAX */ NULL, 5136 /* WINED3DSIH_IMM_ATOMIC_UMIN */ NULL, 5137 /* WINED3DSIH_IMM_ATOMIC_XOR */ NULL, 5138 /* WINED3DSIH_IMUL */ NULL, 5139 /* WINED3DSIH_INE */ NULL, 5140 /* WINED3DSIH_INEG */ NULL, 5141 /* WINED3DSIH_ISHL */ NULL, 5142 /* WINED3DSIH_ISHR */ NULL, 5143 /* WINED3DSIH_ITOF */ NULL, 5144 /* WINED3DSIH_LABEL */ shader_hw_label, 5145 /* WINED3DSIH_LD */ NULL, 5146 /* WINED3DSIH_LD2DMS */ NULL, 5147 /* WINED3DSIH_LD_RAW */ NULL, 5148 /* WINED3DSIH_LD_STRUCTURED */ NULL, 5149 /* WINED3DSIH_LD_UAV_TYPED */ NULL, 5150 /* WINED3DSIH_LIT */ shader_hw_map2gl, 5151 /* WINED3DSIH_LOD */ NULL, 5152 /* WINED3DSIH_LOG */ shader_hw_scalar_op, 5153 /* WINED3DSIH_LOGP */ shader_hw_scalar_op, 5154 /* WINED3DSIH_LOOP */ shader_hw_loop, 5155 /* WINED3DSIH_LRP */ shader_hw_lrp, 5156 /* WINED3DSIH_LT */ NULL, 5157 /* WINED3DSIH_M3x2 */ shader_hw_mnxn, 5158 /* WINED3DSIH_M3x3 */ shader_hw_mnxn, 5159 /* WINED3DSIH_M3x4 */ shader_hw_mnxn, 5160 /* WINED3DSIH_M4x3 */ shader_hw_mnxn, 5161 /* WINED3DSIH_M4x4 */ shader_hw_mnxn, 5162 /* WINED3DSIH_MAD */ shader_hw_map2gl, 5163 /* WINED3DSIH_MAX */ shader_hw_map2gl, 5164 /* WINED3DSIH_MIN */ shader_hw_map2gl, 5165 /* WINED3DSIH_MOV */ shader_hw_mov, 5166 /* WINED3DSIH_MOVA */ shader_hw_mov, 5167 /* WINED3DSIH_MOVC */ NULL, 5168 /* WINED3DSIH_MUL */ shader_hw_map2gl, 5169 /* WINED3DSIH_NE */ NULL, 5170 /* WINED3DSIH_NOP */ shader_hw_nop, 5171 /* WINED3DSIH_NOT */ NULL, 5172 /* WINED3DSIH_NRM */ shader_hw_nrm, 5173 /* WINED3DSIH_OR */ NULL, 5174 /* WINED3DSIH_PHASE */ shader_hw_nop, 5175 /* WINED3DSIH_POW */ shader_hw_pow, 5176 /* WINED3DSIH_RCP */ shader_hw_scalar_op, 5177 /* WINED3DSIH_REP */ shader_hw_rep, 5178 /* WINED3DSIH_RESINFO */ NULL, 5179 /* WINED3DSIH_RET */ shader_hw_ret, 5180 /* WINED3DSIH_RETP */ NULL, 5181 /* WINED3DSIH_ROUND_NE */ NULL, 5182 /* WINED3DSIH_ROUND_NI */ NULL, 5183 /* WINED3DSIH_ROUND_PI */ NULL, 5184 /* WINED3DSIH_ROUND_Z */ NULL, 5185 /* WINED3DSIH_RSQ */ shader_hw_scalar_op, 5186 /* WINED3DSIH_SAMPLE */ NULL, 5187 /* WINED3DSIH_SAMPLE_B */ NULL, 5188 /* WINED3DSIH_SAMPLE_C */ NULL, 5189 /* WINED3DSIH_SAMPLE_C_LZ */ NULL, 5190 /* WINED3DSIH_SAMPLE_GRAD */ NULL, 5191 /* WINED3DSIH_SAMPLE_INFO */ NULL, 5192 /* WINED3DSIH_SAMPLE_LOD */ NULL, 5193 /* WINED3DSIH_SAMPLE_POS */ NULL, 5194 /* WINED3DSIH_SETP */ NULL, 5195 /* WINED3DSIH_SGE */ shader_hw_map2gl, 5196 /* WINED3DSIH_SGN */ shader_hw_sgn, 5197 /* WINED3DSIH_SINCOS */ shader_hw_sincos, 5198 /* WINED3DSIH_SLT */ shader_hw_map2gl, 5199 /* WINED3DSIH_SQRT */ NULL, 5200 /* WINED3DSIH_STORE_RAW */ NULL, 5201 /* WINED3DSIH_STORE_STRUCTURED */ NULL, 5202 /* WINED3DSIH_STORE_UAV_TYPED */ NULL, 5203 /* WINED3DSIH_SUB */ shader_hw_map2gl, 5204 /* WINED3DSIH_SWAPC */ NULL, 5205 /* WINED3DSIH_SWITCH */ NULL, 5206 /* WINED3DSIH_SYNC */ NULL, 5207 /* WINED3DSIH_TEX */ pshader_hw_tex, 5208 /* WINED3DSIH_TEXBEM */ pshader_hw_texbem, 5209 /* WINED3DSIH_TEXBEML */ pshader_hw_texbem, 5210 /* WINED3DSIH_TEXCOORD */ pshader_hw_texcoord, 5211 /* WINED3DSIH_TEXDEPTH */ pshader_hw_texdepth, 5212 /* WINED3DSIH_TEXDP3 */ pshader_hw_texdp3, 5213 /* WINED3DSIH_TEXDP3TEX */ pshader_hw_texdp3tex, 5214 /* WINED3DSIH_TEXKILL */ pshader_hw_texkill, 5215 /* WINED3DSIH_TEXLDD */ shader_hw_texldd, 5216 /* WINED3DSIH_TEXLDL */ shader_hw_texldl, 5217 /* WINED3DSIH_TEXM3x2DEPTH */ pshader_hw_texm3x2depth, 5218 /* WINED3DSIH_TEXM3x2PAD */ pshader_hw_texm3x2pad, 5219 /* WINED3DSIH_TEXM3x2TEX */ pshader_hw_texm3x2tex, 5220 /* WINED3DSIH_TEXM3x3 */ pshader_hw_texm3x3, 5221 /* WINED3DSIH_TEXM3x3DIFF */ NULL, 5222 /* WINED3DSIH_TEXM3x3PAD */ pshader_hw_texm3x3pad, 5223 /* WINED3DSIH_TEXM3x3SPEC */ pshader_hw_texm3x3spec, 5224 /* WINED3DSIH_TEXM3x3TEX */ pshader_hw_texm3x3tex, 5225 /* WINED3DSIH_TEXM3x3VSPEC */ pshader_hw_texm3x3vspec, 5226 /* WINED3DSIH_TEXREG2AR */ pshader_hw_texreg2ar, 5227 /* WINED3DSIH_TEXREG2GB */ pshader_hw_texreg2gb, 5228 /* WINED3DSIH_TEXREG2RGB */ pshader_hw_texreg2rgb, 5229 /* WINED3DSIH_UBFE */ NULL, 5230 /* WINED3DSIH_UDIV */ NULL, 5231 /* WINED3DSIH_UGE */ NULL, 5232 /* WINED3DSIH_ULT */ NULL, 5233 /* WINED3DSIH_UMAX */ NULL, 5234 /* WINED3DSIH_UMIN */ NULL, 5235 /* WINED3DSIH_UMUL */ NULL, 5236 /* WINED3DSIH_USHR */ NULL, 5237 /* WINED3DSIH_UTOF */ NULL, 5238 /* WINED3DSIH_XOR */ NULL, 5239 }; 5240 5241 static BOOL get_bool_const(const struct wined3d_shader_instruction *ins, 5242 const struct wined3d_shader *shader, DWORD idx) 5243 { 5244 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 5245 BOOL vshader = shader_is_vshader_version(reg_maps->shader_version.type); 5246 const struct wined3d_shader_lconst *constant; 5247 WORD bools = 0; 5248 WORD flag = (1u << idx); 5249 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5250 5251 if (reg_maps->local_bool_consts & flag) 5252 { 5253 /* What good is an if(bool) with a hardcoded local constant? I don't know, but handle it */ 5254 LIST_FOR_EACH_ENTRY(constant, &shader->constantsB, struct wined3d_shader_lconst, entry) 5255 { 5256 if (constant->idx == idx) 5257 { 5258 return constant->value[0]; 5259 } 5260 } 5261 ERR("Local constant not found\n"); 5262 return FALSE; 5263 } 5264 else 5265 { 5266 if(vshader) bools = priv->cur_vs_args->clip.boolclip.bools; 5267 else bools = priv->cur_ps_args->bools; 5268 return bools & flag; 5269 } 5270 } 5271 5272 static void get_loop_control_const(const struct wined3d_shader_instruction *ins, 5273 const struct wined3d_shader *shader, UINT idx, struct wined3d_shader_loop_control *loop_control) 5274 { 5275 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 5276 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5277 5278 /* Integer constants can either be a local constant, or they can be stored in the shader 5279 * type specific compile args. */ 5280 if (reg_maps->local_int_consts & (1u << idx)) 5281 { 5282 const struct wined3d_shader_lconst *constant; 5283 5284 LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry) 5285 { 5286 if (constant->idx == idx) 5287 { 5288 loop_control->count = constant->value[0]; 5289 loop_control->start = constant->value[1]; 5290 /* Step is signed. */ 5291 loop_control->step = (int)constant->value[2]; 5292 return; 5293 } 5294 } 5295 /* If this happens the flag was set incorrectly */ 5296 ERR("Local constant not found\n"); 5297 loop_control->count = 0; 5298 loop_control->start = 0; 5299 loop_control->step = 0; 5300 return; 5301 } 5302 5303 switch (reg_maps->shader_version.type) 5304 { 5305 case WINED3D_SHADER_TYPE_VERTEX: 5306 /* Count and aL start value are unsigned */ 5307 loop_control->count = priv->cur_vs_args->loop_ctrl[idx][0]; 5308 loop_control->start = priv->cur_vs_args->loop_ctrl[idx][1]; 5309 /* Step is signed. */ 5310 loop_control->step = ((char)priv->cur_vs_args->loop_ctrl[idx][2]); 5311 break; 5312 5313 case WINED3D_SHADER_TYPE_PIXEL: 5314 loop_control->count = priv->cur_ps_args->loop_ctrl[idx][0]; 5315 loop_control->start = priv->cur_ps_args->loop_ctrl[idx][1]; 5316 loop_control->step = ((char)priv->cur_ps_args->loop_ctrl[idx][2]); 5317 break; 5318 5319 default: 5320 FIXME("Unhandled shader type %#x.\n", reg_maps->shader_version.type); 5321 break; 5322 } 5323 } 5324 5325 static void record_instruction(struct list *list, const struct wined3d_shader_instruction *ins) 5326 { 5327 unsigned int i; 5328 struct wined3d_shader_dst_param *dst_param; 5329 struct wined3d_shader_src_param *src_param = NULL, *rel_addr; 5330 struct recorded_instruction *rec = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*rec)); 5331 if(!rec) 5332 { 5333 ERR("Out of memory\n"); 5334 return; 5335 } 5336 5337 rec->ins = *ins; 5338 dst_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*dst_param)); 5339 if(!dst_param) goto free; 5340 *dst_param = *ins->dst; 5341 if (ins->dst->reg.idx[0].rel_addr) 5342 { 5343 rel_addr = HeapAlloc(GetProcessHeap(), 0, sizeof(*rel_addr)); 5344 if (!rel_addr) 5345 goto free; 5346 *rel_addr = *ins->dst->reg.idx[0].rel_addr; 5347 dst_param->reg.idx[0].rel_addr = rel_addr; 5348 } 5349 rec->ins.dst = dst_param; 5350 5351 if (!(src_param = wined3d_calloc(ins->src_count, sizeof(*src_param)))) 5352 goto free; 5353 for (i = 0; i < ins->src_count; ++i) 5354 { 5355 src_param[i] = ins->src[i]; 5356 if (ins->src[i].reg.idx[0].rel_addr) 5357 { 5358 rel_addr = HeapAlloc(GetProcessHeap(), 0, sizeof(*rel_addr)); 5359 if (!rel_addr) 5360 goto free; 5361 *rel_addr = *ins->src[i].reg.idx[0].rel_addr; 5362 src_param[i].reg.idx[0].rel_addr = rel_addr; 5363 } 5364 } 5365 rec->ins.src = src_param; 5366 list_add_tail(list, &rec->entry); 5367 return; 5368 5369 free: 5370 ERR("Out of memory\n"); 5371 if(dst_param) 5372 { 5373 HeapFree(GetProcessHeap(), 0, (void *)dst_param->reg.idx[0].rel_addr); 5374 HeapFree(GetProcessHeap(), 0, dst_param); 5375 } 5376 if(src_param) 5377 { 5378 for(i = 0; i < ins->src_count; i++) 5379 { 5380 HeapFree(GetProcessHeap(), 0, (void *)src_param[i].reg.idx[0].rel_addr); 5381 } 5382 HeapFree(GetProcessHeap(), 0, src_param); 5383 } 5384 HeapFree(GetProcessHeap(), 0, rec); 5385 } 5386 5387 static void free_recorded_instruction(struct list *list) 5388 { 5389 struct recorded_instruction *rec_ins, *entry2; 5390 unsigned int i; 5391 5392 LIST_FOR_EACH_ENTRY_SAFE(rec_ins, entry2, list, struct recorded_instruction, entry) 5393 { 5394 list_remove(&rec_ins->entry); 5395 if (rec_ins->ins.dst) 5396 { 5397 HeapFree(GetProcessHeap(), 0, (void *)rec_ins->ins.dst->reg.idx[0].rel_addr); 5398 HeapFree(GetProcessHeap(), 0, (void *)rec_ins->ins.dst); 5399 } 5400 if (rec_ins->ins.src) 5401 { 5402 for (i = 0; i < rec_ins->ins.src_count; ++i) 5403 { 5404 HeapFree(GetProcessHeap(), 0, (void *)rec_ins->ins.src[i].reg.idx[0].rel_addr); 5405 } 5406 HeapFree(GetProcessHeap(), 0, (void *)rec_ins->ins.src); 5407 } 5408 HeapFree(GetProcessHeap(), 0, rec_ins); 5409 } 5410 } 5411 5412 static void pop_control_frame(const struct wined3d_shader_instruction *ins) 5413 { 5414 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5415 struct control_frame *control_frame; 5416 5417 if (ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP) 5418 { 5419 struct list *e = list_head(&priv->control_frames); 5420 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5421 list_remove(&control_frame->entry); 5422 HeapFree(GetProcessHeap(), 0, control_frame); 5423 priv->loop_depth--; 5424 } 5425 else if (ins->handler_idx == WINED3DSIH_ENDIF) 5426 { 5427 /* Non-ifc ENDIFs were already handled previously. */ 5428 struct list *e = list_head(&priv->control_frames); 5429 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5430 list_remove(&control_frame->entry); 5431 HeapFree(GetProcessHeap(), 0, control_frame); 5432 } 5433 } 5434 5435 static void shader_arb_handle_instruction(const struct wined3d_shader_instruction *ins) { 5436 SHADER_HANDLER hw_fct; 5437 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5438 const struct wined3d_shader *shader = ins->ctx->shader; 5439 struct control_frame *control_frame; 5440 struct wined3d_string_buffer *buffer = ins->ctx->buffer; 5441 BOOL bool_const; 5442 5443 if(ins->handler_idx == WINED3DSIH_LOOP || ins->handler_idx == WINED3DSIH_REP) 5444 { 5445 control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame)); 5446 list_add_head(&priv->control_frames, &control_frame->entry); 5447 5448 if(ins->handler_idx == WINED3DSIH_LOOP) control_frame->type = LOOP; 5449 if(ins->handler_idx == WINED3DSIH_REP) control_frame->type = REP; 5450 5451 if(priv->target_version >= NV2) 5452 { 5453 control_frame->no.loop = priv->num_loops++; 5454 priv->loop_depth++; 5455 } 5456 else 5457 { 5458 /* Don't bother recording when we're in a not used if branch */ 5459 if(priv->muted) 5460 { 5461 return; 5462 } 5463 5464 if(!priv->recording) 5465 { 5466 list_init(&priv->record); 5467 priv->recording = TRUE; 5468 control_frame->outer_loop = TRUE; 5469 get_loop_control_const(ins, shader, ins->src[0].reg.idx[0].offset, &control_frame->loop_control); 5470 return; /* Instruction is handled */ 5471 } 5472 /* Record this loop in the outer loop's recording */ 5473 } 5474 } 5475 else if(ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP) 5476 { 5477 if(priv->target_version >= NV2) 5478 { 5479 /* Nothing to do. The control frame is popped after the HW instr handler */ 5480 } 5481 else 5482 { 5483 struct list *e = list_head(&priv->control_frames); 5484 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5485 list_remove(&control_frame->entry); 5486 5487 if(control_frame->outer_loop) 5488 { 5489 unsigned int iteration; 5490 int aL = 0; 5491 struct list copy; 5492 5493 /* Turn off recording before playback */ 5494 priv->recording = FALSE; 5495 5496 /* Move the recorded instructions to a separate list and get them out of the private data 5497 * structure. If there are nested loops, the shader_arb_handle_instruction below will 5498 * be recorded again, thus priv->record might be overwritten 5499 */ 5500 list_init(©); 5501 list_move_tail(©, &priv->record); 5502 list_init(&priv->record); 5503 5504 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5505 { 5506 shader_addline(buffer, "#unrolling loop: %u iterations, aL=%u, inc %d\n", 5507 control_frame->loop_control.count, control_frame->loop_control.start, 5508 control_frame->loop_control.step); 5509 aL = control_frame->loop_control.start; 5510 } 5511 else 5512 { 5513 shader_addline(buffer, "#unrolling rep: %u iterations\n", control_frame->loop_control.count); 5514 } 5515 5516 for (iteration = 0; iteration < control_frame->loop_control.count; ++iteration) 5517 { 5518 struct recorded_instruction *rec_ins; 5519 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5520 { 5521 priv->aL = aL; 5522 shader_addline(buffer, "#Iteration %u, aL=%d\n", iteration, aL); 5523 } 5524 else 5525 { 5526 shader_addline(buffer, "#Iteration %u\n", iteration); 5527 } 5528 5529 LIST_FOR_EACH_ENTRY(rec_ins, ©, struct recorded_instruction, entry) 5530 { 5531 shader_arb_handle_instruction(&rec_ins->ins); 5532 } 5533 5534 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5535 { 5536 aL += control_frame->loop_control.step; 5537 } 5538 } 5539 shader_addline(buffer, "#end loop/rep\n"); 5540 5541 free_recorded_instruction(©); 5542 HeapFree(GetProcessHeap(), 0, control_frame); 5543 return; /* Instruction is handled */ 5544 } 5545 else 5546 { 5547 /* This is a nested loop. Proceed to the normal recording function */ 5548 HeapFree(GetProcessHeap(), 0, control_frame); 5549 } 5550 } 5551 } 5552 5553 if(priv->recording) 5554 { 5555 record_instruction(&priv->record, ins); 5556 return; 5557 } 5558 5559 /* boolean if */ 5560 if(ins->handler_idx == WINED3DSIH_IF) 5561 { 5562 control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame)); 5563 list_add_head(&priv->control_frames, &control_frame->entry); 5564 control_frame->type = IF; 5565 5566 bool_const = get_bool_const(ins, shader, ins->src[0].reg.idx[0].offset); 5567 if (ins->src[0].modifiers == WINED3DSPSM_NOT) 5568 bool_const = !bool_const; 5569 if (!priv->muted && !bool_const) 5570 { 5571 shader_addline(buffer, "#if(FALSE){\n"); 5572 priv->muted = TRUE; 5573 control_frame->muting = TRUE; 5574 } 5575 else shader_addline(buffer, "#if(TRUE) {\n"); 5576 5577 return; /* Instruction is handled */ 5578 } 5579 else if(ins->handler_idx == WINED3DSIH_IFC) 5580 { 5581 /* IF(bool) and if_cond(a, b) use the same ELSE and ENDIF tokens */ 5582 control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame)); 5583 control_frame->type = IFC; 5584 control_frame->no.ifc = priv->num_ifcs++; 5585 list_add_head(&priv->control_frames, &control_frame->entry); 5586 } 5587 else if(ins->handler_idx == WINED3DSIH_ELSE) 5588 { 5589 struct list *e = list_head(&priv->control_frames); 5590 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5591 5592 if(control_frame->type == IF) 5593 { 5594 shader_addline(buffer, "#} else {\n"); 5595 if(!priv->muted && !control_frame->muting) 5596 { 5597 priv->muted = TRUE; 5598 control_frame->muting = TRUE; 5599 } 5600 else if(control_frame->muting) priv->muted = FALSE; 5601 return; /* Instruction is handled. */ 5602 } 5603 /* In case of an ifc, generate a HW shader instruction */ 5604 if (control_frame->type != IFC) 5605 ERR("Control frame does not match.\n"); 5606 } 5607 else if(ins->handler_idx == WINED3DSIH_ENDIF) 5608 { 5609 struct list *e = list_head(&priv->control_frames); 5610 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5611 5612 if(control_frame->type == IF) 5613 { 5614 shader_addline(buffer, "#} endif\n"); 5615 if(control_frame->muting) priv->muted = FALSE; 5616 list_remove(&control_frame->entry); 5617 HeapFree(GetProcessHeap(), 0, control_frame); 5618 return; /* Instruction is handled */ 5619 } 5620 /* In case of an ifc, generate a HW shader instruction */ 5621 if (control_frame->type != IFC) 5622 ERR("Control frame does not match.\n"); 5623 } 5624 5625 if(priv->muted) 5626 { 5627 pop_control_frame(ins); 5628 return; 5629 } 5630 5631 /* Select handler */ 5632 hw_fct = shader_arb_instruction_handler_table[ins->handler_idx]; 5633 5634 /* Unhandled opcode */ 5635 if (!hw_fct) 5636 { 5637 FIXME("Backend can't handle opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); 5638 return; 5639 } 5640 hw_fct(ins); 5641 5642 pop_control_frame(ins); 5643 5644 shader_arb_add_instruction_modifiers(ins); 5645 } 5646 5647 static BOOL shader_arb_has_ffp_proj_control(void *shader_priv) 5648 { 5649 struct shader_arb_priv *priv = shader_priv; 5650 5651 return priv->ffp_proj_control; 5652 } 5653 5654 static void shader_arb_precompile(void *shader_priv, struct wined3d_shader *shader) {} 5655 5656 const struct wined3d_shader_backend_ops arb_program_shader_backend = 5657 { 5658 shader_arb_handle_instruction, 5659 shader_arb_precompile, 5660 shader_arb_select, 5661 shader_arb_select_compute, 5662 shader_arb_disable, 5663 shader_arb_update_float_vertex_constants, 5664 shader_arb_update_float_pixel_constants, 5665 shader_arb_load_constants, 5666 shader_arb_destroy, 5667 shader_arb_alloc, 5668 shader_arb_free, 5669 shader_arb_allocate_context_data, 5670 shader_arb_free_context_data, 5671 shader_arb_init_context_state, 5672 shader_arb_get_caps, 5673 shader_arb_color_fixup_supported, 5674 shader_arb_has_ffp_proj_control, 5675 }; 5676 5677 /* ARB_fragment_program fixed function pipeline replacement definitions */ 5678 #define ARB_FFP_CONST_TFACTOR 0 5679 #define ARB_FFP_CONST_COLOR_KEY_LOW ((ARB_FFP_CONST_TFACTOR) + 1) 5680 #define ARB_FFP_CONST_COLOR_KEY_HIGH ((ARB_FFP_CONST_COLOR_KEY_LOW) + 1) 5681 #define ARB_FFP_CONST_SPECULAR_ENABLE ((ARB_FFP_CONST_COLOR_KEY_HIGH) + 1) 5682 #define ARB_FFP_CONST_CONSTANT(i) ((ARB_FFP_CONST_SPECULAR_ENABLE) + 1 + i) 5683 #define ARB_FFP_CONST_BUMPMAT(i) ((ARB_FFP_CONST_CONSTANT(7)) + 1 + i) 5684 #define ARB_FFP_CONST_LUMINANCE(i) ((ARB_FFP_CONST_BUMPMAT(7)) + 1 + i) 5685 5686 struct arbfp_ffp_desc 5687 { 5688 struct ffp_frag_desc parent; 5689 GLuint shader; 5690 }; 5691 5692 /* Context activation is done by the caller. */ 5693 static void arbfp_enable(const struct wined3d_gl_info *gl_info, BOOL enable) 5694 { 5695 if (enable) 5696 { 5697 gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB); 5698 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)"); 5699 } 5700 else 5701 { 5702 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 5703 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 5704 } 5705 } 5706 5707 static void *arbfp_alloc(const struct wined3d_shader_backend_ops *shader_backend, void *shader_priv) 5708 { 5709 struct shader_arb_priv *priv; 5710 5711 /* Share private data between the shader backend and the pipeline 5712 * replacement, if both are the arb implementation. This is needed to 5713 * figure out whether ARBfp should be disabled if no pixel shader is bound 5714 * or not. */ 5715 if (shader_backend == &arb_program_shader_backend) 5716 priv = shader_priv; 5717 else if (!(priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*priv)))) 5718 return NULL; 5719 5720 wine_rb_init(&priv->fragment_shaders, wined3d_ffp_frag_program_key_compare); 5721 priv->use_arbfp_fixed_func = TRUE; 5722 5723 return priv; 5724 } 5725 5726 /* Context activation is done by the caller. */ 5727 static void arbfp_free_ffpshader(struct wine_rb_entry *entry, void *context) 5728 { 5729 const struct wined3d_gl_info *gl_info = context; 5730 struct arbfp_ffp_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_ffp_desc, parent.entry); 5731 5732 GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader)); 5733 checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)"); 5734 HeapFree(GetProcessHeap(), 0, entry_arb); 5735 } 5736 5737 /* Context activation is done by the caller. */ 5738 static void arbfp_free(struct wined3d_device *device) 5739 { 5740 struct shader_arb_priv *priv = device->fragment_priv; 5741 5742 wine_rb_destroy(&priv->fragment_shaders, arbfp_free_ffpshader, &device->adapter->gl_info); 5743 priv->use_arbfp_fixed_func = FALSE; 5744 5745 if (device->shader_backend != &arb_program_shader_backend) 5746 { 5747 HeapFree(GetProcessHeap(), 0, device->fragment_priv); 5748 } 5749 } 5750 5751 static void arbfp_get_caps(const struct wined3d_gl_info *gl_info, struct fragment_caps *caps) 5752 { 5753 caps->wined3d_caps = WINED3D_FRAGMENT_CAP_PROJ_CONTROL 5754 | WINED3D_FRAGMENT_CAP_SRGB_WRITE 5755 | WINED3D_FRAGMENT_CAP_COLOR_KEY; 5756 caps->PrimitiveMiscCaps = WINED3DPMISCCAPS_TSSARGTEMP; 5757 caps->TextureOpCaps = WINED3DTEXOPCAPS_DISABLE | 5758 WINED3DTEXOPCAPS_SELECTARG1 | 5759 WINED3DTEXOPCAPS_SELECTARG2 | 5760 WINED3DTEXOPCAPS_MODULATE4X | 5761 WINED3DTEXOPCAPS_MODULATE2X | 5762 WINED3DTEXOPCAPS_MODULATE | 5763 WINED3DTEXOPCAPS_ADDSIGNED2X | 5764 WINED3DTEXOPCAPS_ADDSIGNED | 5765 WINED3DTEXOPCAPS_ADD | 5766 WINED3DTEXOPCAPS_SUBTRACT | 5767 WINED3DTEXOPCAPS_ADDSMOOTH | 5768 WINED3DTEXOPCAPS_BLENDCURRENTALPHA | 5769 WINED3DTEXOPCAPS_BLENDFACTORALPHA | 5770 WINED3DTEXOPCAPS_BLENDTEXTUREALPHA | 5771 WINED3DTEXOPCAPS_BLENDDIFFUSEALPHA | 5772 WINED3DTEXOPCAPS_BLENDTEXTUREALPHAPM | 5773 WINED3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR | 5774 WINED3DTEXOPCAPS_MODULATECOLOR_ADDALPHA | 5775 WINED3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA | 5776 WINED3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR | 5777 WINED3DTEXOPCAPS_DOTPRODUCT3 | 5778 WINED3DTEXOPCAPS_MULTIPLYADD | 5779 WINED3DTEXOPCAPS_LERP | 5780 WINED3DTEXOPCAPS_BUMPENVMAP | 5781 WINED3DTEXOPCAPS_BUMPENVMAPLUMINANCE; 5782 5783 /* TODO: Implement WINED3DTEXOPCAPS_PREMODULATE */ 5784 5785 caps->MaxTextureBlendStages = MAX_TEXTURES; 5786 caps->MaxSimultaneousTextures = min(gl_info->limits.samplers[WINED3D_SHADER_TYPE_PIXEL], MAX_TEXTURES); 5787 } 5788 5789 static DWORD arbfp_get_emul_mask(const struct wined3d_gl_info *gl_info) 5790 { 5791 return GL_EXT_EMUL_ARB_MULTITEXTURE | GL_EXT_EMUL_EXT_FOG_COORD; 5792 } 5793 5794 static void state_texfactor_arbfp(struct wined3d_context *context, 5795 const struct wined3d_state *state, DWORD state_id) 5796 { 5797 const struct wined3d_gl_info *gl_info = context->gl_info; 5798 struct wined3d_device *device = context->device; 5799 struct wined3d_color color; 5800 5801 if (device->shader_backend == &arb_program_shader_backend) 5802 { 5803 struct shader_arb_priv *priv; 5804 5805 /* Don't load the parameter if we're using an arbfp pixel shader, 5806 * otherwise we'll overwrite application provided constants. */ 5807 if (use_ps(state)) 5808 return; 5809 5810 priv = device->shader_priv; 5811 priv->pshader_const_dirty[ARB_FFP_CONST_TFACTOR] = 1; 5812 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_TFACTOR + 1); 5813 } 5814 5815 wined3d_color_from_d3dcolor(&color, state->render_states[WINED3D_RS_TEXTUREFACTOR]); 5816 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, &color.r)); 5817 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, &color.r)"); 5818 } 5819 5820 static void state_tss_constant_arbfp(struct wined3d_context *context, 5821 const struct wined3d_state *state, DWORD state_id) 5822 { 5823 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 5824 const struct wined3d_gl_info *gl_info = context->gl_info; 5825 struct wined3d_device *device = context->device; 5826 struct wined3d_color color; 5827 5828 if (device->shader_backend == &arb_program_shader_backend) 5829 { 5830 struct shader_arb_priv *priv; 5831 5832 /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite 5833 * application provided constants. 5834 */ 5835 if (use_ps(state)) 5836 return; 5837 5838 priv = device->shader_priv; 5839 priv->pshader_const_dirty[ARB_FFP_CONST_CONSTANT(stage)] = 1; 5840 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_CONSTANT(stage) + 1); 5841 } 5842 5843 wined3d_color_from_d3dcolor(&color, state->texture_states[stage][WINED3D_TSS_CONSTANT]); 5844 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_CONSTANT(stage), &color.r)); 5845 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_CONSTANT(stage), &color.r)"); 5846 } 5847 5848 static void state_arb_specularenable(struct wined3d_context *context, 5849 const struct wined3d_state *state, DWORD state_id) 5850 { 5851 const struct wined3d_gl_info *gl_info = context->gl_info; 5852 struct wined3d_device *device = context->device; 5853 float col[4]; 5854 5855 if (device->shader_backend == &arb_program_shader_backend) 5856 { 5857 struct shader_arb_priv *priv; 5858 5859 /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite 5860 * application provided constants. 5861 */ 5862 if (use_ps(state)) 5863 return; 5864 5865 priv = device->shader_priv; 5866 priv->pshader_const_dirty[ARB_FFP_CONST_SPECULAR_ENABLE] = 1; 5867 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_SPECULAR_ENABLE + 1); 5868 } 5869 5870 if (state->render_states[WINED3D_RS_SPECULARENABLE]) 5871 { 5872 /* The specular color has no alpha */ 5873 col[0] = 1.0f; col[1] = 1.0f; 5874 col[2] = 1.0f; col[3] = 0.0f; 5875 } else { 5876 col[0] = 0.0f; col[1] = 0.0f; 5877 col[2] = 0.0f; col[3] = 0.0f; 5878 } 5879 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)); 5880 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)"); 5881 } 5882 5883 static void set_bumpmat_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 5884 { 5885 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 5886 const struct wined3d_gl_info *gl_info = context->gl_info; 5887 struct wined3d_device *device = context->device; 5888 float mat[2][2]; 5889 5890 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV; 5891 5892 if (device->shader_backend == &arb_program_shader_backend) 5893 { 5894 struct shader_arb_priv *priv = device->shader_priv; 5895 5896 /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants. */ 5897 if (use_ps(state)) 5898 return; 5899 5900 priv->pshader_const_dirty[ARB_FFP_CONST_BUMPMAT(stage)] = 1; 5901 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_BUMPMAT(stage) + 1); 5902 } 5903 5904 mat[0][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT00]); 5905 mat[0][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT01]); 5906 mat[1][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT10]); 5907 mat[1][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT11]); 5908 5909 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])); 5910 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])"); 5911 } 5912 5913 static void tex_bumpenvlum_arbfp(struct wined3d_context *context, 5914 const struct wined3d_state *state, DWORD state_id) 5915 { 5916 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 5917 const struct wined3d_gl_info *gl_info = context->gl_info; 5918 struct wined3d_device *device = context->device; 5919 float param[4]; 5920 5921 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV; 5922 5923 if (device->shader_backend == &arb_program_shader_backend) 5924 { 5925 struct shader_arb_priv *priv = device->shader_priv; 5926 5927 /* Exit now, don't set the luminance below, otherwise we may overwrite pixel shader constants. */ 5928 if (use_ps(state)) 5929 return; 5930 5931 priv->pshader_const_dirty[ARB_FFP_CONST_LUMINANCE(stage)] = 1; 5932 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_LUMINANCE(stage) + 1); 5933 } 5934 5935 param[0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LSCALE]); 5936 param[1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LOFFSET]); 5937 param[2] = 0.0f; 5938 param[3] = 0.0f; 5939 5940 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)); 5941 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)"); 5942 } 5943 5944 static void alpha_test_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 5945 { 5946 const struct wined3d_gl_info *gl_info = context->gl_info; 5947 int glParm; 5948 float ref; 5949 5950 TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 5951 5952 if (state->render_states[WINED3D_RS_ALPHATESTENABLE]) 5953 { 5954 gl_info->gl_ops.gl.p_glEnable(GL_ALPHA_TEST); 5955 checkGLcall("glEnable GL_ALPHA_TEST"); 5956 } 5957 else 5958 { 5959 gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST); 5960 checkGLcall("glDisable GL_ALPHA_TEST"); 5961 return; 5962 } 5963 5964 ref = ((float)state->render_states[WINED3D_RS_ALPHAREF]) / 255.0f; 5965 glParm = wined3d_gl_compare_func(state->render_states[WINED3D_RS_ALPHAFUNC]); 5966 5967 if (glParm) 5968 { 5969 gl_info->gl_ops.gl.p_glAlphaFunc(glParm, ref); 5970 checkGLcall("glAlphaFunc"); 5971 } 5972 } 5973 5974 static void color_key_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 5975 { 5976 const struct wined3d_texture *texture = state->textures[0]; 5977 const struct wined3d_gl_info *gl_info = context->gl_info; 5978 struct wined3d_device *device = context->device; 5979 struct wined3d_color float_key[2]; 5980 5981 if (!texture) 5982 return; 5983 5984 if (device->shader_backend == &arb_program_shader_backend) 5985 { 5986 struct shader_arb_priv *priv; 5987 5988 /* Don't load the parameter if we're using an arbfp pixel shader, 5989 * otherwise we'll overwrite application provided constants. */ 5990 if (use_ps(state)) 5991 return; 5992 5993 priv = device->shader_priv; 5994 priv->pshader_const_dirty[ARB_FFP_CONST_COLOR_KEY_LOW] = 1; 5995 priv->pshader_const_dirty[ARB_FFP_CONST_COLOR_KEY_HIGH] = 1; 5996 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_COLOR_KEY_HIGH + 1); 5997 } 5998 5999 wined3d_format_get_float_color_key(texture->resource.format, &texture->async.src_blt_color_key, float_key); 6000 6001 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_LOW, &float_key[0].r)); 6002 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_LOW, &float_key[0].r)"); 6003 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_HIGH, &float_key[1].r)); 6004 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_HIGH, &float_key[1].r)"); 6005 } 6006 6007 static const char *get_argreg(struct wined3d_string_buffer *buffer, DWORD argnum, unsigned int stage, DWORD arg) 6008 { 6009 const char *ret; 6010 6011 if(arg == ARG_UNUSED) return "unused"; /* This is the marker for unused registers */ 6012 6013 switch(arg & WINED3DTA_SELECTMASK) { 6014 case WINED3DTA_DIFFUSE: 6015 ret = "fragment.color.primary"; break; 6016 6017 case WINED3DTA_CURRENT: 6018 ret = "ret"; 6019 break; 6020 6021 case WINED3DTA_TEXTURE: 6022 switch(stage) { 6023 case 0: ret = "tex0"; break; 6024 case 1: ret = "tex1"; break; 6025 case 2: ret = "tex2"; break; 6026 case 3: ret = "tex3"; break; 6027 case 4: ret = "tex4"; break; 6028 case 5: ret = "tex5"; break; 6029 case 6: ret = "tex6"; break; 6030 case 7: ret = "tex7"; break; 6031 default: ret = "unknown texture"; 6032 } 6033 break; 6034 6035 case WINED3DTA_TFACTOR: 6036 ret = "tfactor"; break; 6037 6038 case WINED3DTA_SPECULAR: 6039 ret = "fragment.color.secondary"; break; 6040 6041 case WINED3DTA_TEMP: 6042 ret = "tempreg"; break; 6043 6044 case WINED3DTA_CONSTANT: 6045 switch(stage) { 6046 case 0: ret = "const0"; break; 6047 case 1: ret = "const1"; break; 6048 case 2: ret = "const2"; break; 6049 case 3: ret = "const3"; break; 6050 case 4: ret = "const4"; break; 6051 case 5: ret = "const5"; break; 6052 case 6: ret = "const6"; break; 6053 case 7: ret = "const7"; break; 6054 default: ret = "unknown constant"; 6055 } 6056 break; 6057 6058 default: 6059 return "unknown"; 6060 } 6061 6062 if(arg & WINED3DTA_COMPLEMENT) { 6063 shader_addline(buffer, "SUB arg%u, const.x, %s;\n", argnum, ret); 6064 if(argnum == 0) ret = "arg0"; 6065 if(argnum == 1) ret = "arg1"; 6066 if(argnum == 2) ret = "arg2"; 6067 } 6068 if(arg & WINED3DTA_ALPHAREPLICATE) { 6069 shader_addline(buffer, "MOV arg%u, %s.w;\n", argnum, ret); 6070 if(argnum == 0) ret = "arg0"; 6071 if(argnum == 1) ret = "arg1"; 6072 if(argnum == 2) ret = "arg2"; 6073 } 6074 return ret; 6075 } 6076 6077 static void gen_ffp_instr(struct wined3d_string_buffer *buffer, unsigned int stage, BOOL color, 6078 BOOL alpha, DWORD dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2) 6079 { 6080 const char *dstmask, *dstreg, *arg0, *arg1, *arg2; 6081 unsigned int mul = 1; 6082 6083 if(color && alpha) dstmask = ""; 6084 else if(color) dstmask = ".xyz"; 6085 else dstmask = ".w"; 6086 6087 if(dst == tempreg) dstreg = "tempreg"; 6088 else dstreg = "ret"; 6089 6090 arg0 = get_argreg(buffer, 0, stage, dw_arg0); 6091 arg1 = get_argreg(buffer, 1, stage, dw_arg1); 6092 arg2 = get_argreg(buffer, 2, stage, dw_arg2); 6093 6094 switch (op) 6095 { 6096 case WINED3D_TOP_DISABLE: 6097 break; 6098 6099 case WINED3D_TOP_SELECT_ARG2: 6100 arg1 = arg2; 6101 /* FALLTHROUGH */ 6102 case WINED3D_TOP_SELECT_ARG1: 6103 shader_addline(buffer, "MOV %s%s, %s;\n", dstreg, dstmask, arg1); 6104 break; 6105 6106 case WINED3D_TOP_MODULATE_4X: 6107 mul = 2; 6108 /* FALLTHROUGH */ 6109 case WINED3D_TOP_MODULATE_2X: 6110 mul *= 2; 6111 /* FALLTHROUGH */ 6112 case WINED3D_TOP_MODULATE: 6113 shader_addline(buffer, "MUL %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 6114 break; 6115 6116 case WINED3D_TOP_ADD_SIGNED_2X: 6117 mul = 2; 6118 /* FALLTHROUGH */ 6119 case WINED3D_TOP_ADD_SIGNED: 6120 shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2); 6121 arg2 = "arg2"; 6122 /* FALLTHROUGH */ 6123 case WINED3D_TOP_ADD: 6124 shader_addline(buffer, "ADD_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 6125 break; 6126 6127 case WINED3D_TOP_SUBTRACT: 6128 shader_addline(buffer, "SUB_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 6129 break; 6130 6131 case WINED3D_TOP_ADD_SMOOTH: 6132 shader_addline(buffer, "SUB arg1, const.x, %s;\n", arg1); 6133 shader_addline(buffer, "MAD_SAT %s%s, arg1, %s, %s;\n", dstreg, dstmask, arg2, arg1); 6134 break; 6135 6136 case WINED3D_TOP_BLEND_CURRENT_ALPHA: 6137 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_CURRENT); 6138 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6139 break; 6140 case WINED3D_TOP_BLEND_FACTOR_ALPHA: 6141 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TFACTOR); 6142 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6143 break; 6144 case WINED3D_TOP_BLEND_TEXTURE_ALPHA: 6145 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE); 6146 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6147 break; 6148 case WINED3D_TOP_BLEND_DIFFUSE_ALPHA: 6149 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_DIFFUSE); 6150 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6151 break; 6152 6153 case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM: 6154 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE); 6155 shader_addline(buffer, "SUB arg0.w, const.x, %s.w;\n", arg0); 6156 shader_addline(buffer, "MAD_SAT %s%s, %s, arg0.w, %s;\n", dstreg, dstmask, arg2, arg1); 6157 break; 6158 6159 /* D3DTOP_PREMODULATE ???? */ 6160 6161 case WINED3D_TOP_MODULATE_INVALPHA_ADD_COLOR: 6162 shader_addline(buffer, "SUB arg0.w, const.x, %s;\n", arg1); 6163 shader_addline(buffer, "MAD_SAT %s%s, arg0.w, %s, %s;\n", dstreg, dstmask, arg2, arg1); 6164 break; 6165 case WINED3D_TOP_MODULATE_ALPHA_ADD_COLOR: 6166 shader_addline(buffer, "MAD_SAT %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg1); 6167 break; 6168 case WINED3D_TOP_MODULATE_INVCOLOR_ADD_ALPHA: 6169 shader_addline(buffer, "SUB arg0, const.x, %s;\n", arg1); 6170 shader_addline(buffer, "MAD_SAT %s%s, arg0, %s, %s.w;\n", dstreg, dstmask, arg2, arg1); 6171 break; 6172 case WINED3D_TOP_MODULATE_COLOR_ADD_ALPHA: 6173 shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s.w;\n", dstreg, dstmask, arg1, arg2, arg1); 6174 break; 6175 6176 case WINED3D_TOP_DOTPRODUCT3: 6177 mul = 4; 6178 shader_addline(buffer, "SUB arg1, %s, const.w;\n", arg1); 6179 shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2); 6180 shader_addline(buffer, "DP3_SAT %s%s, arg1, arg2;\n", dstreg, dstmask); 6181 break; 6182 6183 case WINED3D_TOP_MULTIPLY_ADD: 6184 shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg0); 6185 break; 6186 6187 case WINED3D_TOP_LERP: 6188 /* The msdn is not quite right here */ 6189 shader_addline(buffer, "LRP %s%s, %s, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 6190 break; 6191 6192 case WINED3D_TOP_BUMPENVMAP: 6193 case WINED3D_TOP_BUMPENVMAP_LUMINANCE: 6194 /* Those are handled in the first pass of the shader(generation pass 1 and 2) already */ 6195 break; 6196 6197 default: 6198 FIXME("Unhandled texture op %08x\n", op); 6199 } 6200 6201 if (mul == 2) 6202 shader_addline(buffer, "MUL_SAT %s%s, %s, const.y;\n", dstreg, dstmask, dstreg); 6203 else if (mul == 4) 6204 shader_addline(buffer, "MUL_SAT %s%s, %s, const.z;\n", dstreg, dstmask, dstreg); 6205 } 6206 6207 static const char *arbfp_texture_target(enum wined3d_gl_resource_type type) 6208 { 6209 switch(type) 6210 { 6211 case WINED3D_GL_RES_TYPE_TEX_1D: 6212 return "1D"; 6213 case WINED3D_GL_RES_TYPE_TEX_2D: 6214 return "2D"; 6215 case WINED3D_GL_RES_TYPE_TEX_3D: 6216 return "3D"; 6217 case WINED3D_GL_RES_TYPE_TEX_CUBE: 6218 return "CUBE"; 6219 case WINED3D_GL_RES_TYPE_TEX_RECT: 6220 return "RECT"; 6221 default: 6222 return "unexpected_resource_type"; 6223 } 6224 } 6225 6226 static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, const struct wined3d_gl_info *gl_info) 6227 { 6228 BYTE tex_read = 0, bump_used = 0, luminance_used = 0, constant_used = 0; 6229 BOOL tempreg_used = FALSE, tfactor_used = FALSE; 6230 unsigned int stage, lowest_disabled_stage; 6231 struct wined3d_string_buffer buffer; 6232 struct color_fixup_masks masks; 6233 BOOL custom_linear_fog = FALSE; 6234 const char *textype, *instr; 6235 DWORD arg0, arg1, arg2; 6236 char colorcor_dst[8]; 6237 BOOL op_equal; 6238 GLuint ret; 6239 6240 if (!string_buffer_init(&buffer)) 6241 { 6242 ERR("Failed to initialize shader buffer.\n"); 6243 return 0; 6244 } 6245 6246 shader_addline(&buffer, "!!ARBfp1.0\n"); 6247 6248 if (settings->color_key_enabled) 6249 { 6250 shader_addline(&buffer, "PARAM color_key_low = program.env[%u];\n", ARB_FFP_CONST_COLOR_KEY_LOW); 6251 shader_addline(&buffer, "PARAM color_key_high = program.env[%u];\n", ARB_FFP_CONST_COLOR_KEY_HIGH); 6252 tex_read |= 1; 6253 } 6254 6255 /* Find out which textures are read */ 6256 for (stage = 0; stage < MAX_TEXTURES; ++stage) 6257 { 6258 if (settings->op[stage].cop == WINED3D_TOP_DISABLE) 6259 break; 6260 6261 arg0 = settings->op[stage].carg0 & WINED3DTA_SELECTMASK; 6262 arg1 = settings->op[stage].carg1 & WINED3DTA_SELECTMASK; 6263 arg2 = settings->op[stage].carg2 & WINED3DTA_SELECTMASK; 6264 6265 if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE) 6266 tex_read |= 1u << stage; 6267 if (settings->op[stage].dst == tempreg) 6268 tempreg_used = TRUE; 6269 if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) 6270 tempreg_used = TRUE; 6271 if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) 6272 tfactor_used = TRUE; 6273 if (arg0 == WINED3DTA_CONSTANT || arg1 == WINED3DTA_CONSTANT || arg2 == WINED3DTA_CONSTANT) 6274 constant_used |= 1u << stage; 6275 6276 switch (settings->op[stage].cop) 6277 { 6278 case WINED3D_TOP_BUMPENVMAP_LUMINANCE: 6279 luminance_used |= 1u << stage; 6280 /* fall through */ 6281 case WINED3D_TOP_BUMPENVMAP: 6282 bump_used |= 1u << stage; 6283 /* fall through */ 6284 case WINED3D_TOP_BLEND_TEXTURE_ALPHA: 6285 case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM: 6286 tex_read |= 1u << stage; 6287 break; 6288 6289 case WINED3D_TOP_BLEND_FACTOR_ALPHA: 6290 tfactor_used = TRUE; 6291 break; 6292 6293 default: 6294 break; 6295 } 6296 6297 if (settings->op[stage].aop == WINED3D_TOP_DISABLE) 6298 continue; 6299 6300 arg0 = settings->op[stage].aarg0 & WINED3DTA_SELECTMASK; 6301 arg1 = settings->op[stage].aarg1 & WINED3DTA_SELECTMASK; 6302 arg2 = settings->op[stage].aarg2 & WINED3DTA_SELECTMASK; 6303 6304 if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE) 6305 tex_read |= 1u << stage; 6306 if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) 6307 tempreg_used = TRUE; 6308 if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) 6309 tfactor_used = TRUE; 6310 if (arg0 == WINED3DTA_CONSTANT || arg1 == WINED3DTA_CONSTANT || arg2 == WINED3DTA_CONSTANT) 6311 constant_used |= 1u << stage; 6312 } 6313 lowest_disabled_stage = stage; 6314 6315 switch (settings->fog) 6316 { 6317 case WINED3D_FFP_PS_FOG_OFF: break; 6318 case WINED3D_FFP_PS_FOG_LINEAR: 6319 if (gl_info->quirks & WINED3D_QUIRK_BROKEN_ARB_FOG) 6320 { 6321 custom_linear_fog = TRUE; 6322 break; 6323 } 6324 shader_addline(&buffer, "OPTION ARB_fog_linear;\n"); 6325 break; 6326 6327 case WINED3D_FFP_PS_FOG_EXP: shader_addline(&buffer, "OPTION ARB_fog_exp;\n"); break; 6328 case WINED3D_FFP_PS_FOG_EXP2: shader_addline(&buffer, "OPTION ARB_fog_exp2;\n"); break; 6329 default: FIXME("Unexpected fog setting %d\n", settings->fog); 6330 } 6331 6332 shader_addline(&buffer, "PARAM const = {1, 2, 4, 0.5};\n"); 6333 shader_addline(&buffer, "TEMP TMP;\n"); 6334 shader_addline(&buffer, "TEMP ret;\n"); 6335 if (tempreg_used || settings->sRGB_write) shader_addline(&buffer, "TEMP tempreg;\n"); 6336 shader_addline(&buffer, "TEMP arg0;\n"); 6337 shader_addline(&buffer, "TEMP arg1;\n"); 6338 shader_addline(&buffer, "TEMP arg2;\n"); 6339 for (stage = 0; stage < MAX_TEXTURES; ++stage) 6340 { 6341 if (constant_used & (1u << stage)) 6342 shader_addline(&buffer, "PARAM const%u = program.env[%u];\n", stage, ARB_FFP_CONST_CONSTANT(stage)); 6343 6344 if (!(tex_read & (1u << stage))) 6345 continue; 6346 6347 shader_addline(&buffer, "TEMP tex%u;\n", stage); 6348 6349 if (!(bump_used & (1u << stage))) 6350 continue; 6351 shader_addline(&buffer, "PARAM bumpmat%u = program.env[%u];\n", stage, ARB_FFP_CONST_BUMPMAT(stage)); 6352 6353 if (!(luminance_used & (1u << stage))) 6354 continue; 6355 shader_addline(&buffer, "PARAM luminance%u = program.env[%u];\n", stage, ARB_FFP_CONST_LUMINANCE(stage)); 6356 } 6357 if (tfactor_used) 6358 shader_addline(&buffer, "PARAM tfactor = program.env[%u];\n", ARB_FFP_CONST_TFACTOR); 6359 shader_addline(&buffer, "PARAM specular_enable = program.env[%u];\n", ARB_FFP_CONST_SPECULAR_ENABLE); 6360 6361 if (settings->sRGB_write) 6362 { 6363 shader_addline(&buffer, "PARAM srgb_consts0 = "); 6364 shader_arb_append_imm_vec4(&buffer, wined3d_srgb_const0); 6365 shader_addline(&buffer, ";\n"); 6366 shader_addline(&buffer, "PARAM srgb_consts1 = "); 6367 shader_arb_append_imm_vec4(&buffer, wined3d_srgb_const1); 6368 shader_addline(&buffer, ";\n"); 6369 } 6370 6371 if (lowest_disabled_stage < 7 && settings->emul_clipplanes) 6372 shader_addline(&buffer, "KIL fragment.texcoord[7];\n"); 6373 6374 if (tempreg_used || settings->sRGB_write) 6375 shader_addline(&buffer, "MOV tempreg, 0.0;\n"); 6376 6377 /* Generate texture sampling instructions */ 6378 for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) 6379 { 6380 if (!(tex_read & (1u << stage))) 6381 continue; 6382 6383 textype = arbfp_texture_target(settings->op[stage].tex_type); 6384 6385 if(settings->op[stage].projected == proj_none) { 6386 instr = "TEX"; 6387 } else if(settings->op[stage].projected == proj_count4 || 6388 settings->op[stage].projected == proj_count3) { 6389 instr = "TXP"; 6390 } else { 6391 FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); 6392 instr = "TXP"; 6393 } 6394 6395 if (stage > 0 6396 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP 6397 || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) 6398 { 6399 shader_addline(&buffer, "SWZ arg1, bumpmat%u, x, z, 0, 0;\n", stage - 1); 6400 shader_addline(&buffer, "DP3 ret.x, arg1, tex%u;\n", stage - 1); 6401 shader_addline(&buffer, "SWZ arg1, bumpmat%u, y, w, 0, 0;\n", stage - 1); 6402 shader_addline(&buffer, "DP3 ret.y, arg1, tex%u;\n", stage - 1); 6403 6404 /* with projective textures, texbem only divides the static texture coord, not the displacement, 6405 * so multiply the displacement with the dividing parameter before passing it to TXP 6406 */ 6407 if (settings->op[stage].projected != proj_none) { 6408 if(settings->op[stage].projected == proj_count4) { 6409 shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].w;\n", stage); 6410 shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n", stage, stage); 6411 } else { 6412 shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].z;\n", stage); 6413 shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n", stage, stage); 6414 } 6415 } else { 6416 shader_addline(&buffer, "ADD ret, ret, fragment.texcoord[%u];\n", stage); 6417 } 6418 6419 shader_addline(&buffer, "%s tex%u, ret, texture[%u], %s;\n", 6420 instr, stage, stage, textype); 6421 if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) 6422 { 6423 shader_addline(&buffer, "MAD_SAT ret.x, tex%u.z, luminance%u.x, luminance%u.y;\n", 6424 stage - 1, stage - 1, stage - 1); 6425 shader_addline(&buffer, "MUL tex%u, tex%u, ret.x;\n", stage, stage); 6426 } 6427 } else if(settings->op[stage].projected == proj_count3) { 6428 shader_addline(&buffer, "MOV ret, fragment.texcoord[%u];\n", stage); 6429 shader_addline(&buffer, "MOV ret.w, ret.z;\n"); 6430 shader_addline(&buffer, "%s tex%u, ret, texture[%u], %s;\n", 6431 instr, stage, stage, textype); 6432 } else { 6433 shader_addline(&buffer, "%s tex%u, fragment.texcoord[%u], texture[%u], %s;\n", 6434 instr, stage, stage, stage, textype); 6435 } 6436 6437 sprintf(colorcor_dst, "tex%u", stage); 6438 masks = calc_color_correction(settings->op[stage].color_fixup, WINED3DSP_WRITEMASK_ALL); 6439 gen_color_correction(&buffer, colorcor_dst, colorcor_dst, "const.x", "const.y", 6440 settings->op[stage].color_fixup, masks); 6441 } 6442 6443 if (settings->color_key_enabled) 6444 { 6445 shader_addline(&buffer, "SLT TMP, tex0, color_key_low;\n"); /* below low key */ 6446 shader_addline(&buffer, "SGE ret, tex0, color_key_high;\n"); /* above high key */ 6447 shader_addline(&buffer, "ADD TMP, TMP, ret;\n"); /* or */ 6448 shader_addline(&buffer, "DP4 TMP.b, TMP, TMP;\n"); /* on any channel */ 6449 shader_addline(&buffer, "SGE TMP, -TMP.b, 0.0;\n"); /* logical not */ 6450 shader_addline(&buffer, "KIL -TMP;\n"); /* discard if true */ 6451 } 6452 6453 shader_addline(&buffer, "MOV ret, fragment.color.primary;\n"); 6454 6455 /* Generate the main shader */ 6456 for (stage = 0; stage < MAX_TEXTURES; ++stage) 6457 { 6458 if (settings->op[stage].cop == WINED3D_TOP_DISABLE) 6459 break; 6460 6461 if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1 6462 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1) 6463 op_equal = settings->op[stage].carg1 == settings->op[stage].aarg1; 6464 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1 6465 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2) 6466 op_equal = settings->op[stage].carg1 == settings->op[stage].aarg2; 6467 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2 6468 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1) 6469 op_equal = settings->op[stage].carg2 == settings->op[stage].aarg1; 6470 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2 6471 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2) 6472 op_equal = settings->op[stage].carg2 == settings->op[stage].aarg2; 6473 else 6474 op_equal = settings->op[stage].aop == settings->op[stage].cop 6475 && settings->op[stage].carg0 == settings->op[stage].aarg0 6476 && settings->op[stage].carg1 == settings->op[stage].aarg1 6477 && settings->op[stage].carg2 == settings->op[stage].aarg2; 6478 6479 if (settings->op[stage].aop == WINED3D_TOP_DISABLE) 6480 { 6481 gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst, 6482 settings->op[stage].cop, settings->op[stage].carg0, 6483 settings->op[stage].carg1, settings->op[stage].carg2); 6484 } 6485 else if (op_equal) 6486 { 6487 gen_ffp_instr(&buffer, stage, TRUE, TRUE, settings->op[stage].dst, 6488 settings->op[stage].cop, settings->op[stage].carg0, 6489 settings->op[stage].carg1, settings->op[stage].carg2); 6490 } 6491 else if (settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP 6492 && settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP_LUMINANCE) 6493 { 6494 gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst, 6495 settings->op[stage].cop, settings->op[stage].carg0, 6496 settings->op[stage].carg1, settings->op[stage].carg2); 6497 gen_ffp_instr(&buffer, stage, FALSE, TRUE, settings->op[stage].dst, 6498 settings->op[stage].aop, settings->op[stage].aarg0, 6499 settings->op[stage].aarg1, settings->op[stage].aarg2); 6500 } 6501 } 6502 6503 if (settings->sRGB_write || custom_linear_fog) 6504 { 6505 shader_addline(&buffer, "MAD ret, fragment.color.secondary, specular_enable, ret;\n"); 6506 if (settings->sRGB_write) 6507 arbfp_add_sRGB_correction(&buffer, "ret", "arg0", "arg1", "arg2", "tempreg", FALSE); 6508 if (custom_linear_fog) 6509 arbfp_add_linear_fog(&buffer, "ret", "arg0"); 6510 shader_addline(&buffer, "MOV result.color, ret;\n"); 6511 } 6512 else 6513 { 6514 shader_addline(&buffer, "MAD result.color, fragment.color.secondary, specular_enable, ret;\n"); 6515 } 6516 6517 /* Footer */ 6518 shader_addline(&buffer, "END\n"); 6519 6520 /* Generate the shader */ 6521 GL_EXTCALL(glGenProgramsARB(1, &ret)); 6522 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ret)); 6523 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 6524 6525 string_buffer_free(&buffer); 6526 return ret; 6527 } 6528 6529 static void fragment_prog_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 6530 { 6531 const struct wined3d_gl_info *gl_info = context->gl_info; 6532 const struct wined3d_device *device = context->device; 6533 struct shader_arb_priv *priv = device->fragment_priv; 6534 BOOL use_pshader = use_ps(state); 6535 struct ffp_frag_settings settings; 6536 const struct arbfp_ffp_desc *desc; 6537 unsigned int i; 6538 6539 TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 6540 6541 if (isStateDirty(context, STATE_RENDER(WINED3D_RS_FOGENABLE))) 6542 { 6543 if (!use_pshader && device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) 6544 { 6545 /* Reload fixed function constants since they collide with the 6546 * pixel shader constants. */ 6547 for (i = 0; i < MAX_TEXTURES; ++i) 6548 { 6549 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00)); 6550 state_tss_constant_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_CONSTANT)); 6551 } 6552 state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR)); 6553 state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE)); 6554 color_key_arbfp(context, state, STATE_COLOR_KEY); 6555 } 6556 else if (use_pshader) 6557 { 6558 context->shader_update_mask |= 1u << WINED3D_SHADER_TYPE_PIXEL; 6559 } 6560 return; 6561 } 6562 6563 if (!use_pshader) 6564 { 6565 /* Find or create a shader implementing the fixed function pipeline 6566 * settings, then activate it. */ 6567 gen_ffp_frag_op(context, state, &settings, FALSE); 6568 desc = (const struct arbfp_ffp_desc *)find_ffp_frag_shader(&priv->fragment_shaders, &settings); 6569 if (!desc) 6570 { 6571 struct arbfp_ffp_desc *new_desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*new_desc)); 6572 if (!new_desc) 6573 { 6574 ERR("Out of memory\n"); 6575 return; 6576 } 6577 6578 new_desc->parent.settings = settings; 6579 new_desc->shader = gen_arbfp_ffp_shader(&settings, gl_info); 6580 add_ffp_frag_shader(&priv->fragment_shaders, &new_desc->parent); 6581 TRACE("Allocated fixed function replacement shader descriptor %p\n", new_desc); 6582 desc = new_desc; 6583 } 6584 6585 /* Now activate the replacement program. GL_FRAGMENT_PROGRAM_ARB is already active (however, note the 6586 * comment above the shader_select call below). If e.g. GLSL is active, the shader_select call will 6587 * deactivate it. 6588 */ 6589 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)); 6590 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)"); 6591 priv->current_fprogram_id = desc->shader; 6592 6593 if (device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) 6594 { 6595 /* Reload fixed function constants since they collide with the 6596 * pixel shader constants. */ 6597 for (i = 0; i < MAX_TEXTURES; ++i) 6598 { 6599 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00)); 6600 state_tss_constant_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_CONSTANT)); 6601 } 6602 state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR)); 6603 state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE)); 6604 color_key_arbfp(context, state, STATE_COLOR_KEY); 6605 } 6606 context->last_was_pshader = FALSE; 6607 } 6608 else if (!context->last_was_pshader) 6609 { 6610 if (device->shader_backend == &arb_program_shader_backend) 6611 context->constant_update_mask |= WINED3D_SHADER_CONST_PS_F; 6612 context->last_was_pshader = TRUE; 6613 } 6614 6615 context->shader_update_mask |= 1u << WINED3D_SHADER_TYPE_PIXEL; 6616 } 6617 6618 /* We can't link the fog states to the fragment state directly since the 6619 * vertex pipeline links them to FOGENABLE. A different linking in different 6620 * pipeline parts can't be expressed in the combined state table, so we need 6621 * to handle that with a forwarding function. The other invisible side effect 6622 * is that changing the fog start and fog end (which links to FOGENABLE in 6623 * vertex) results in the fragment_prog_arbfp function being called because 6624 * FOGENABLE is dirty, which calls this function here. */ 6625 static void state_arbfp_fog(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 6626 { 6627 enum fogsource new_source; 6628 DWORD fogstart = state->render_states[WINED3D_RS_FOGSTART]; 6629 DWORD fogend = state->render_states[WINED3D_RS_FOGEND]; 6630 6631 TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 6632 6633 if (!isStateDirty(context, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL))) 6634 fragment_prog_arbfp(context, state, state_id); 6635 6636 if (!state->render_states[WINED3D_RS_FOGENABLE]) 6637 return; 6638 6639 if (state->render_states[WINED3D_RS_FOGTABLEMODE] == WINED3D_FOG_NONE) 6640 { 6641 if (use_vs(state)) 6642 { 6643 new_source = FOGSOURCE_VS; 6644 } 6645 else 6646 { 6647 if (state->render_states[WINED3D_RS_FOGVERTEXMODE] == WINED3D_FOG_NONE || context->last_was_rhw) 6648 new_source = FOGSOURCE_COORD; 6649 else 6650 new_source = FOGSOURCE_FFP; 6651 } 6652 } 6653 else 6654 { 6655 new_source = FOGSOURCE_FFP; 6656 } 6657 6658 if (new_source != context->fog_source || fogstart == fogend) 6659 { 6660 context->fog_source = new_source; 6661 state_fogstartend(context, state, STATE_RENDER(WINED3D_RS_FOGSTART)); 6662 } 6663 } 6664 6665 static void textransform(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 6666 { 6667 if (!isStateDirty(context, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL))) 6668 fragment_prog_arbfp(context, state, state_id); 6669 } 6670 6671 static const struct StateEntryTemplate arbfp_fragmentstate_template[] = 6672 { 6673 {STATE_RENDER(WINED3D_RS_TEXTUREFACTOR), { STATE_RENDER(WINED3D_RS_TEXTUREFACTOR), state_texfactor_arbfp }, WINED3D_GL_EXT_NONE }, 6674 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6675 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6676 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6677 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6678 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6679 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6680 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6681 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6682 {STATE_TEXTURESTAGE(0, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6683 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6684 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6685 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6686 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6687 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6688 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6689 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6690 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6691 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6692 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6693 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6694 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6695 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6696 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6697 {STATE_TEXTURESTAGE(1, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6698 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6699 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6700 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6701 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6702 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6703 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6704 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6705 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6706 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6707 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6708 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6709 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6710 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6711 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6712 {STATE_TEXTURESTAGE(2, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6713 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6714 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6715 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6716 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6717 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6718 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6719 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6720 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6721 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6722 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6723 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6724 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6725 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6726 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6727 {STATE_TEXTURESTAGE(3, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6728 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6729 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6730 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6731 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6732 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6733 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6734 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6735 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6736 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6737 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6738 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6739 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6740 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6741 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6742 {STATE_TEXTURESTAGE(4, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6743 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6744 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6745 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6746 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6747 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6748 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6749 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6750 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6751 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6752 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6753 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6754 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6755 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6756 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6757 {STATE_TEXTURESTAGE(5, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6758 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6759 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6760 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6761 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6762 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6763 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6764 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6765 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6766 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6767 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6768 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6769 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6770 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6771 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6772 {STATE_TEXTURESTAGE(6, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6773 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6774 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6775 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6776 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6777 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6778 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6779 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6780 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6781 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6782 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6783 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_OP), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6784 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG1), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6785 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG2), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6786 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG0), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6787 {STATE_TEXTURESTAGE(7, WINED3D_TSS_RESULT_ARG), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6788 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6789 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6790 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6791 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 6792 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6793 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6794 {STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), fragment_prog_arbfp }, WINED3D_GL_EXT_NONE }, 6795 {STATE_RENDER(WINED3D_RS_ALPHAFUNC), { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6796 {STATE_RENDER(WINED3D_RS_ALPHAREF), { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6797 {STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE), alpha_test_arbfp }, WINED3D_GL_EXT_NONE }, 6798 {STATE_RENDER(WINED3D_RS_COLORKEYENABLE), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6799 {STATE_COLOR_KEY, { STATE_COLOR_KEY, color_key_arbfp }, WINED3D_GL_EXT_NONE }, 6800 {STATE_RENDER(WINED3D_RS_FOGENABLE), { STATE_RENDER(WINED3D_RS_FOGENABLE), state_arbfp_fog }, WINED3D_GL_EXT_NONE }, 6801 {STATE_RENDER(WINED3D_RS_FOGTABLEMODE), { STATE_RENDER(WINED3D_RS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6802 {STATE_RENDER(WINED3D_RS_FOGVERTEXMODE), { STATE_RENDER(WINED3D_RS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6803 {STATE_RENDER(WINED3D_RS_FOGSTART), { STATE_RENDER(WINED3D_RS_FOGSTART), state_fogstartend }, WINED3D_GL_EXT_NONE }, 6804 {STATE_RENDER(WINED3D_RS_FOGEND), { STATE_RENDER(WINED3D_RS_FOGSTART), NULL }, WINED3D_GL_EXT_NONE }, 6805 {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE), { STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE), state_srgbwrite }, ARB_FRAMEBUFFER_SRGB }, 6806 {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE), { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL), NULL }, WINED3D_GL_EXT_NONE }, 6807 {STATE_RENDER(WINED3D_RS_FOGCOLOR), { STATE_RENDER(WINED3D_RS_FOGCOLOR), state_fogcolor }, WINED3D_GL_EXT_NONE }, 6808 {STATE_RENDER(WINED3D_RS_FOGDENSITY), { STATE_RENDER(WINED3D_RS_FOGDENSITY), state_fogdensity }, WINED3D_GL_EXT_NONE }, 6809 {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6810 {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6811 {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6812 {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6813 {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6814 {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6815 {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6816 {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE }, 6817 {STATE_TEXTURESTAGE(0, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(0, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6818 {STATE_TEXTURESTAGE(1, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(1, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6819 {STATE_TEXTURESTAGE(2, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(2, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6820 {STATE_TEXTURESTAGE(3, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(3, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6821 {STATE_TEXTURESTAGE(4, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(4, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6822 {STATE_TEXTURESTAGE(5, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(5, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6823 {STATE_TEXTURESTAGE(6, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(6, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6824 {STATE_TEXTURESTAGE(7, WINED3D_TSS_CONSTANT), { STATE_TEXTURESTAGE(7, WINED3D_TSS_CONSTANT), state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE }, 6825 {STATE_RENDER(WINED3D_RS_SPECULARENABLE), { STATE_RENDER(WINED3D_RS_SPECULARENABLE), state_arb_specularenable}, WINED3D_GL_EXT_NONE }, 6826 {STATE_RENDER(WINED3D_RS_SHADEMODE), { STATE_RENDER(WINED3D_RS_SHADEMODE), state_shademode }, WINED3D_GL_EXT_NONE }, 6827 {0 /* Terminate */, { 0, 0 }, WINED3D_GL_EXT_NONE }, 6828 }; 6829 6830 static BOOL arbfp_alloc_context_data(struct wined3d_context *context) 6831 { 6832 return TRUE; 6833 } 6834 6835 static void arbfp_free_context_data(struct wined3d_context *context) 6836 { 6837 } 6838 6839 const struct fragment_pipeline arbfp_fragment_pipeline = { 6840 arbfp_enable, 6841 arbfp_get_caps, 6842 arbfp_get_emul_mask, 6843 arbfp_alloc, 6844 arbfp_free, 6845 arbfp_alloc_context_data, 6846 arbfp_free_context_data, 6847 shader_arb_color_fixup_supported, 6848 arbfp_fragmentstate_template, 6849 }; 6850 6851 struct arbfp_blit_type 6852 { 6853 enum complex_fixup fixup : 4; 6854 enum wined3d_gl_resource_type res_type : 3; 6855 DWORD use_color_key : 1; 6856 DWORD padding : 24; 6857 }; 6858 6859 struct arbfp_blit_desc 6860 { 6861 GLuint shader; 6862 struct arbfp_blit_type type; 6863 struct wine_rb_entry entry; 6864 }; 6865 6866 #define ARBFP_BLIT_PARAM_SIZE 0 6867 #define ARBFP_BLIT_PARAM_COLOR_KEY_LOW 1 6868 #define ARBFP_BLIT_PARAM_COLOR_KEY_HIGH 2 6869 6870 struct wined3d_arbfp_blitter 6871 { 6872 struct wined3d_blitter blitter; 6873 struct wine_rb_tree shaders; 6874 GLuint palette_texture; 6875 }; 6876 6877 static int arbfp_blit_type_compare(const void *key, const struct wine_rb_entry *entry) 6878 { 6879 const struct arbfp_blit_type *ka = key; 6880 const struct arbfp_blit_type *kb = &WINE_RB_ENTRY_VALUE(entry, const struct arbfp_blit_desc, entry)->type; 6881 6882 return memcmp(ka, kb, sizeof(*ka)); 6883 } 6884 6885 /* Context activation is done by the caller. */ 6886 static void arbfp_free_blit_shader(struct wine_rb_entry *entry, void *ctx) 6887 { 6888 struct arbfp_blit_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_blit_desc, entry); 6889 const struct wined3d_gl_info *gl_info; 6890 struct wined3d_context *context; 6891 6892 context = ctx; 6893 gl_info = context->gl_info; 6894 6895 GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader)); 6896 checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)"); 6897 HeapFree(GetProcessHeap(), 0, entry_arb); 6898 } 6899 6900 /* Context activation is done by the caller. */ 6901 static void arbfp_blitter_destroy(struct wined3d_blitter *blitter, struct wined3d_context *context) 6902 { 6903 const struct wined3d_gl_info *gl_info = context->gl_info; 6904 struct wined3d_arbfp_blitter *arbfp_blitter; 6905 struct wined3d_blitter *next; 6906 6907 if ((next = blitter->next)) 6908 next->ops->blitter_destroy(next, context); 6909 6910 arbfp_blitter = CONTAINING_RECORD(blitter, struct wined3d_arbfp_blitter, blitter); 6911 6912 wine_rb_destroy(&arbfp_blitter->shaders, arbfp_free_blit_shader, context); 6913 checkGLcall("Delete blit programs"); 6914 6915 if (arbfp_blitter->palette_texture) 6916 gl_info->gl_ops.gl.p_glDeleteTextures(1, &arbfp_blitter->palette_texture); 6917 6918 HeapFree(GetProcessHeap(), 0, arbfp_blitter); 6919 } 6920 6921 static BOOL gen_planar_yuv_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type, 6922 char *luminance) 6923 { 6924 char chroma; 6925 const char *tex, *texinstr = "TXP"; 6926 6927 if (type->fixup == COMPLEX_FIXUP_UYVY) 6928 { 6929 chroma = 'x'; 6930 *luminance = 'w'; 6931 } 6932 else 6933 { 6934 chroma = 'w'; 6935 *luminance = 'x'; 6936 } 6937 6938 tex = arbfp_texture_target(type->res_type); 6939 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_RECT) 6940 texinstr = "TEX"; 6941 6942 /* First we have to read the chroma values. This means we need at least two pixels(no filtering), 6943 * or 4 pixels(with filtering). To get the unmodified chromas, we have to rid ourselves of the 6944 * filtering when we sample the texture. 6945 * 6946 * These are the rules for reading the chroma: 6947 * 6948 * Even pixel: Cr 6949 * Even pixel: U 6950 * Odd pixel: V 6951 * 6952 * So we have to get the sampling x position in non-normalized coordinates in integers 6953 */ 6954 if (type->res_type != WINED3D_GL_RES_TYPE_TEX_RECT) 6955 { 6956 shader_addline(buffer, "MUL texcrd.xy, fragment.texcoord[0], size.x;\n"); 6957 shader_addline(buffer, "MOV texcrd.w, size.x;\n"); 6958 } 6959 else 6960 { 6961 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 6962 } 6963 /* We must not allow filtering between pixel x and x+1, this would mix U and V 6964 * Vertical filtering is ok. However, bear in mind that the pixel center is at 6965 * 0.5, so add 0.5. 6966 */ 6967 shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n"); 6968 shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.y;\n"); 6969 6970 /* Divide the x coordinate by 0.5 and get the fraction. This gives 0.25 and 0.75 for the 6971 * even and odd pixels respectively 6972 */ 6973 shader_addline(buffer, "MUL texcrd2, texcrd, coef.y;\n"); 6974 shader_addline(buffer, "FRC texcrd2, texcrd2;\n"); 6975 6976 /* Sample Pixel 1 */ 6977 shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex); 6978 6979 /* Put the value into either of the chroma values */ 6980 shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n"); 6981 shader_addline(buffer, "MUL chroma.x, luminance.%c, temp.x;\n", chroma); 6982 shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n"); 6983 shader_addline(buffer, "MUL chroma.y, luminance.%c, temp.x;\n", chroma); 6984 6985 /* Sample pixel 2. If we read an even pixel(SLT above returned 1), sample 6986 * the pixel right to the current one. Otherwise, sample the left pixel. 6987 * Bias and scale the SLT result to -1;1 and add it to the texcrd.x. 6988 */ 6989 shader_addline(buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n"); 6990 shader_addline(buffer, "ADD texcrd.x, texcrd, temp.x;\n"); 6991 shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex); 6992 6993 /* Put the value into the other chroma */ 6994 shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n"); 6995 shader_addline(buffer, "MAD chroma.y, luminance.%c, temp.x, chroma.y;\n", chroma); 6996 shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n"); 6997 shader_addline(buffer, "MAD chroma.x, luminance.%c, temp.x, chroma.x;\n", chroma); 6998 6999 /* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of 7000 * the current one and lerp the two U and V values 7001 */ 7002 7003 /* This gives the correctly filtered luminance value */ 7004 shader_addline(buffer, "TEX luminance, fragment.texcoord[0], texture[0], %s;\n", tex); 7005 7006 return TRUE; 7007 } 7008 7009 static BOOL gen_yv12_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type, 7010 char *luminance) 7011 { 7012 const char *tex; 7013 static const float yv12_coef[] 7014 = {2.0f / 3.0f, 1.0f / 6.0f, (2.0f / 3.0f) + (1.0f / 6.0f), 1.0f / 3.0f}; 7015 7016 tex = arbfp_texture_target(type->res_type); 7017 7018 /* YV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2) 7019 * V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So the effective 7020 * bitdepth is 12 bits per pixel. Since the U and V planes have only half the 7021 * pitch of the luminance plane, the packing into the gl texture is a bit 7022 * unfortunate. If the whole texture is interpreted as luminance data it looks 7023 * approximately like this: 7024 * 7025 * +----------------------------------+---- 7026 * | | 7027 * | | 7028 * | | 7029 * | | 7030 * | | 2 7031 * | LUMINANCE | - 7032 * | | 3 7033 * | | 7034 * | | 7035 * | | 7036 * | | 7037 * +----------------+-----------------+---- 7038 * | | | 7039 * | V even rows | V odd rows | 7040 * | | | 1 7041 * +----------------+------------------ - 7042 * | | | 3 7043 * | U even rows | U odd rows | 7044 * | | | 7045 * +----------------+-----------------+---- 7046 * | | | 7047 * | 0.5 | 0.5 | 7048 * 7049 * So it appears as if there are 4 chroma images, but in fact the odd rows 7050 * in the chroma images are in the same row as the even ones. So it is 7051 * kinda tricky to read 7052 * 7053 * When reading from rectangle textures, keep in mind that the input y coordinates 7054 * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height 7055 */ 7056 shader_addline(buffer, "PARAM yv12_coef = "); 7057 shader_arb_append_imm_vec4(buffer, yv12_coef); 7058 shader_addline(buffer, ";\n"); 7059 7060 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7061 /* the chroma planes have only half the width */ 7062 shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n"); 7063 7064 /* The first value is between 2/3 and 5/6th of the texture's height, so scale+bias 7065 * the coordinate. Also read the right side of the image when reading odd lines 7066 * 7067 * Don't forget to clamp the y values in into the range, otherwise we'll get filtering 7068 * bleeding 7069 */ 7070 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7071 { 7072 7073 shader_addline(buffer, "RCP chroma.w, size.y;\n"); 7074 7075 shader_addline(buffer, "MUL texcrd2.y, texcrd.y, size.y;\n"); 7076 7077 shader_addline(buffer, "FLR texcrd2.y, texcrd2.y;\n"); 7078 shader_addline(buffer, "MAD texcrd.y, texcrd.y, yv12_coef.y, yv12_coef.x;\n"); 7079 7080 /* Read odd lines from the right side(add size * 0.5 to the x coordinate */ 7081 shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */ 7082 shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n"); 7083 shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n"); 7084 shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n"); 7085 7086 /* clamp, keep the half pixel origin in mind */ 7087 shader_addline(buffer, "MAD temp.y, coef.y, chroma.w, yv12_coef.x;\n"); 7088 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7089 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.z;\n"); 7090 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7091 } 7092 else 7093 { 7094 /* Read from [size - size+size/4] */ 7095 shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n"); 7096 shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.w, size.y;\n"); 7097 7098 /* Read odd lines from the right side(add size * 0.5 to the x coordinate */ 7099 shader_addline(buffer, "ADD texcrd2.x, texcrd.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */ 7100 shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n"); 7101 shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n"); 7102 shader_addline(buffer, "MUL texcrd2.x, texcrd2.x, size.x;\n"); 7103 shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n"); 7104 7105 /* Make sure to read exactly from the pixel center */ 7106 shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n"); 7107 shader_addline(buffer, "ADD texcrd.y, texcrd.y, coef.y;\n"); 7108 7109 /* Clamp */ 7110 shader_addline(buffer, "MAD temp.y, size.y, coef.w, size.y;\n"); 7111 shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n"); 7112 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7113 shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n"); 7114 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7115 } 7116 /* Read the texture, put the result into the output register */ 7117 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7118 shader_addline(buffer, "MOV chroma.x, temp.w;\n"); 7119 7120 /* The other chroma value is 1/6th of the texture lower, from 5/6th to 6/6th 7121 * No need to clamp because we're just reusing the already clamped value from above 7122 */ 7123 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7124 shader_addline(buffer, "ADD texcrd.y, texcrd.y, yv12_coef.y;\n"); 7125 else 7126 shader_addline(buffer, "MAD texcrd.y, size.y, coef.w, texcrd.y;\n"); 7127 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7128 shader_addline(buffer, "MOV chroma.y, temp.w;\n"); 7129 7130 /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate. 7131 * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance 7132 * values due to filtering 7133 */ 7134 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7135 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7136 { 7137 /* Multiply the y coordinate by 2/3 and clamp it */ 7138 shader_addline(buffer, "MUL texcrd.y, texcrd.y, yv12_coef.x;\n"); 7139 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.x;\n"); 7140 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7141 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7142 } 7143 else 7144 { 7145 /* Reading from texture_rectangles is pretty straightforward, just use the unmodified 7146 * texture coordinate. It is still a good idea to clamp it though, since the opengl texture 7147 * is bigger 7148 */ 7149 shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n"); 7150 shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n"); 7151 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7152 } 7153 *luminance = 'a'; 7154 7155 return TRUE; 7156 } 7157 7158 static BOOL gen_nv12_read(struct wined3d_string_buffer *buffer, const struct arbfp_blit_type *type, 7159 char *luminance) 7160 { 7161 const char *tex; 7162 static const float nv12_coef[] 7163 = {2.0f / 3.0f, 1.0f / 3.0f, 1.0f, 1.0f}; 7164 7165 tex = arbfp_texture_target(type->res_type); 7166 7167 /* NV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2) 7168 * sized plane where each component is an UV pair. So the effective 7169 * bitdepth is 12 bits per pixel If the whole texture is interpreted as luminance 7170 * data it looks approximately like this: 7171 * 7172 * +----------------------------------+---- 7173 * | | 7174 * | | 7175 * | | 7176 * | | 7177 * | | 2 7178 * | LUMINANCE | - 7179 * | | 3 7180 * | | 7181 * | | 7182 * | | 7183 * | | 7184 * +----------------------------------+---- 7185 * |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV| 7186 * |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV| 7187 * | | 1 7188 * | | - 7189 * | | 3 7190 * | | 7191 * | | 7192 * +----------------------------------+---- 7193 * 7194 * When reading from rectangle textures, keep in mind that the input y coordinates 7195 * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height. */ 7196 7197 shader_addline(buffer, "PARAM nv12_coef = "); 7198 shader_arb_append_imm_vec4(buffer, nv12_coef); 7199 shader_addline(buffer, ";\n"); 7200 7201 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7202 /* We only have half the number of chroma pixels. */ 7203 shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n"); 7204 7205 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7206 { 7207 shader_addline(buffer, "RCP chroma.w, size.x;\n"); 7208 shader_addline(buffer, "RCP chroma.z, size.y;\n"); 7209 7210 shader_addline(buffer, "MAD texcrd.y, texcrd.y, nv12_coef.y, nv12_coef.x;\n"); 7211 7212 /* We must not allow filtering horizontally, this would mix U and V. 7213 * Vertical filtering is ok. However, bear in mind that the pixel center is at 7214 * 0.5, so add 0.5. */ 7215 7216 /* Convert to non-normalized coordinates so we can find the 7217 * individual pixel. */ 7218 shader_addline(buffer, "MUL texcrd.x, texcrd.x, size.x;\n"); 7219 shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n"); 7220 /* Multiply by 2 since chroma components are stored in UV pixel pairs, 7221 * add 0.5 to hit the center of the pixel. */ 7222 shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n"); 7223 7224 /* Convert back to normalized coordinates. */ 7225 shader_addline(buffer, "MUL texcrd.x, texcrd.x, chroma.w;\n"); 7226 7227 /* Clamp, keep the half pixel origin in mind. */ 7228 shader_addline(buffer, "MAD temp.y, coef.y, chroma.z, nv12_coef.x;\n"); 7229 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7230 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.z, nv12_coef.z;\n"); 7231 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7232 } 7233 else 7234 { 7235 /* Read from [size - size+size/2] */ 7236 shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.y, size.y;\n"); 7237 7238 shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n"); 7239 /* Multiply by 2 since chroma components are stored in UV pixel pairs, 7240 * add 0.5 to hit the center of the pixel. */ 7241 shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n"); 7242 7243 /* Clamp */ 7244 shader_addline(buffer, "MAD temp.y, size.y, coef.y, size.y;\n"); 7245 shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n"); 7246 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7247 shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n"); 7248 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 7249 } 7250 /* Read the texture, put the result into the output register. */ 7251 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7252 shader_addline(buffer, "MOV chroma.y, temp.w;\n"); 7253 7254 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7255 { 7256 /* Add 1/size.x */ 7257 shader_addline(buffer, "ADD texcrd.x, texcrd.x, chroma.w;\n"); 7258 } 7259 else 7260 { 7261 /* Add 1 */ 7262 shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.x;\n"); 7263 } 7264 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 7265 shader_addline(buffer, "MOV chroma.x, temp.w;\n"); 7266 7267 /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate. 7268 * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance 7269 * values due to filtering. */ 7270 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 7271 if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D) 7272 { 7273 /* Multiply the y coordinate by 2/3 and clamp it */ 7274 shader_addline(buffer, "MUL texcrd.y, texcrd.y, nv12_coef.x;\n"); 7275 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, nv12_coef.x;\n"); 7276 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 7277 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7278 } 7279 else 7280 { 7281 /* Reading from texture_rectangles is pretty straightforward, just use the unmodified 7282 * texture coordinate. It is still a good idea to clamp it though, since the opengl texture 7283 * is bigger 7284 */ 7285 shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n"); 7286 shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n"); 7287 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 7288 } 7289 *luminance = 'a'; 7290 7291 return TRUE; 7292 } 7293 7294 /* Context activation is done by the caller. */ 7295 static GLuint gen_p8_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type) 7296 { 7297 GLuint shader; 7298 struct wined3d_string_buffer buffer; 7299 const char *tex_target = arbfp_texture_target(type->res_type); 7300 7301 /* This should not happen because we only use this conversion for 7302 * present blits which don't use color keying. */ 7303 if (type->use_color_key) 7304 FIXME("Implement P8 color keying.\n"); 7305 7306 /* Shader header */ 7307 if (!string_buffer_init(&buffer)) 7308 { 7309 ERR("Failed to initialize shader buffer.\n"); 7310 return 0; 7311 } 7312 7313 GL_EXTCALL(glGenProgramsARB(1, &shader)); 7314 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7315 if (!shader) 7316 { 7317 string_buffer_free(&buffer); 7318 return 0; 7319 } 7320 7321 shader_addline(&buffer, "!!ARBfp1.0\n"); 7322 shader_addline(&buffer, "TEMP index;\n"); 7323 7324 /* { 255/256, 0.5/255*255/256, 0, 0 } */ 7325 shader_addline(&buffer, "PARAM constants = { 0.996, 0.00195, 0, 0 };\n"); 7326 7327 /* The alpha-component contains the palette index */ 7328 shader_addline(&buffer, "TEX index, fragment.texcoord[0], texture[0], %s;\n", tex_target); 7329 7330 /* Scale the index by 255/256 and add a bias of '0.5' in order to sample in the middle */ 7331 shader_addline(&buffer, "MAD index.a, index.a, constants.x, constants.y;\n"); 7332 7333 /* Use the alpha-component as an index in the palette to get the final color */ 7334 shader_addline(&buffer, "TEX result.color, index.a, texture[1], 1D;\n"); 7335 shader_addline(&buffer, "END\n"); 7336 7337 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 7338 7339 string_buffer_free(&buffer); 7340 7341 return shader; 7342 } 7343 7344 /* Context activation is done by the caller. */ 7345 static void upload_palette(struct wined3d_arbfp_blitter *blitter, 7346 const struct wined3d_texture *texture, struct wined3d_context *context) 7347 { 7348 const struct wined3d_palette *palette = texture->swapchain ? texture->swapchain->palette : NULL; 7349 const struct wined3d_gl_info *gl_info = context->gl_info; 7350 7351 if (!blitter->palette_texture) 7352 gl_info->gl_ops.gl.p_glGenTextures(1, &blitter->palette_texture); 7353 7354 GL_EXTCALL(glActiveTexture(GL_TEXTURE1)); 7355 gl_info->gl_ops.gl.p_glBindTexture(GL_TEXTURE_1D, blitter->palette_texture); 7356 7357 gl_info->gl_ops.gl.p_glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); 7358 7359 gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 7360 /* Make sure we have discrete color levels. */ 7361 gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 7362 gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 7363 /* TODO: avoid unneeded uploads in the future by adding some SFLAG_PALETTE_DIRTY mechanism */ 7364 if (palette) 7365 { 7366 gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 256, 0, GL_BGRA, 7367 GL_UNSIGNED_INT_8_8_8_8_REV, palette->colors); 7368 } 7369 else 7370 { 7371 static const DWORD black; 7372 FIXME("P8 surface loaded without a palette.\n"); 7373 gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 1, 0, GL_BGRA, 7374 GL_UNSIGNED_INT_8_8_8_8_REV, &black); 7375 } 7376 7377 /* Switch back to unit 0 in which the 2D texture will be stored. */ 7378 context_active_texture(context, gl_info, 0); 7379 } 7380 7381 /* Context activation is done by the caller. */ 7382 static GLuint gen_yuv_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type) 7383 { 7384 GLuint shader; 7385 struct wined3d_string_buffer buffer; 7386 char luminance_component; 7387 7388 if (type->use_color_key) 7389 FIXME("Implement YUV color keying.\n"); 7390 7391 /* Shader header */ 7392 if (!string_buffer_init(&buffer)) 7393 { 7394 ERR("Failed to initialize shader buffer.\n"); 7395 return 0; 7396 } 7397 7398 GL_EXTCALL(glGenProgramsARB(1, &shader)); 7399 checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))"); 7400 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7401 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)"); 7402 if (!shader) 7403 { 7404 string_buffer_free(&buffer); 7405 return 0; 7406 } 7407 7408 /* The YUY2 and UYVY formats contain two pixels packed into a 32 bit macropixel, 7409 * giving effectively 16 bit per pixel. The color consists of a luminance(Y) and 7410 * two chroma(U and V) values. Each macropixel has two luminance values, one for 7411 * each single pixel it contains, and one U and one V value shared between both 7412 * pixels. 7413 * 7414 * The data is loaded into an A8L8 texture. With YUY2, the luminance component 7415 * contains the luminance and alpha the chroma. With UYVY it is vice versa. Thus 7416 * take the format into account when generating the read swizzles 7417 * 7418 * Reading the Y value is straightforward - just sample the texture. The hardware 7419 * takes care of filtering in the horizontal and vertical direction. 7420 * 7421 * Reading the U and V values is harder. We have to avoid filtering horizontally, 7422 * because that would mix the U and V values of one pixel or two adjacent pixels. 7423 * Thus floor the texture coordinate and add 0.5 to get an unfiltered read, 7424 * regardless of the filtering setting. Vertical filtering works automatically 7425 * though - the U and V values of two rows are mixed nicely. 7426 * 7427 * Apart of avoiding filtering issues, the code has to know which value it just 7428 * read, and where it can find the other one. To determine this, it checks if 7429 * it sampled an even or odd pixel, and shifts the 2nd read accordingly. 7430 * 7431 * Handling horizontal filtering of U and V values requires reading a 2nd pair 7432 * of pixels, extracting U and V and mixing them. This is not implemented yet. 7433 * 7434 * An alternative implementation idea is to load the texture as A8R8G8B8 texture, 7435 * with width / 2. This way one read gives all 3 values, finding U and V is easy 7436 * in an unfiltered situation. Finding the luminance on the other hand requires 7437 * finding out if it is an odd or even pixel. The real drawback of this approach 7438 * is filtering. This would have to be emulated completely in the shader, reading 7439 * up two 2 packed pixels in up to 2 rows and interpolating both horizontally and 7440 * vertically. Beyond that it would require adjustments to the texture handling 7441 * code to deal with the width scaling 7442 */ 7443 shader_addline(&buffer, "!!ARBfp1.0\n"); 7444 shader_addline(&buffer, "TEMP luminance;\n"); 7445 shader_addline(&buffer, "TEMP temp;\n"); 7446 shader_addline(&buffer, "TEMP chroma;\n"); 7447 shader_addline(&buffer, "TEMP texcrd;\n"); 7448 shader_addline(&buffer, "TEMP texcrd2;\n"); 7449 shader_addline(&buffer, "PARAM coef = {1.0, 0.5, 2.0, 0.25};\n"); 7450 shader_addline(&buffer, "PARAM yuv_coef = {1.403, 0.344, 0.714, 1.770};\n"); 7451 shader_addline(&buffer, "PARAM size = program.local[%u];\n", ARBFP_BLIT_PARAM_SIZE); 7452 7453 switch (type->fixup) 7454 { 7455 case COMPLEX_FIXUP_UYVY: 7456 case COMPLEX_FIXUP_YUY2: 7457 if (!gen_planar_yuv_read(&buffer, type, &luminance_component)) 7458 { 7459 string_buffer_free(&buffer); 7460 return 0; 7461 } 7462 break; 7463 7464 case COMPLEX_FIXUP_YV12: 7465 if (!gen_yv12_read(&buffer, type, &luminance_component)) 7466 { 7467 string_buffer_free(&buffer); 7468 return 0; 7469 } 7470 break; 7471 7472 case COMPLEX_FIXUP_NV12: 7473 if (!gen_nv12_read(&buffer, type, &luminance_component)) 7474 { 7475 string_buffer_free(&buffer); 7476 return 0; 7477 } 7478 break; 7479 7480 default: 7481 FIXME("Unsupported YUV fixup %#x\n", type->fixup); 7482 string_buffer_free(&buffer); 7483 return 0; 7484 } 7485 7486 /* Calculate the final result. Formula is taken from 7487 * http://www.fourcc.org/fccyvrgb.php. Note that the chroma 7488 * ranges from -0.5 to 0.5 7489 */ 7490 shader_addline(&buffer, "SUB chroma.xy, chroma, coef.y;\n"); 7491 7492 shader_addline(&buffer, "MAD result.color.x, chroma.x, yuv_coef.x, luminance.%c;\n", luminance_component); 7493 shader_addline(&buffer, "MAD temp.x, -chroma.y, yuv_coef.y, luminance.%c;\n", luminance_component); 7494 shader_addline(&buffer, "MAD result.color.y, -chroma.x, yuv_coef.z, temp.x;\n"); 7495 shader_addline(&buffer, "MAD result.color.z, chroma.y, yuv_coef.w, luminance.%c;\n", luminance_component); 7496 shader_addline(&buffer, "END\n"); 7497 7498 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 7499 7500 string_buffer_free(&buffer); 7501 7502 return shader; 7503 } 7504 7505 /* Context activation is done by the caller. */ 7506 static GLuint arbfp_gen_plain_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type) 7507 { 7508 GLuint shader; 7509 struct wined3d_string_buffer buffer; 7510 const char *tex_target = arbfp_texture_target(type->res_type); 7511 7512 /* Shader header */ 7513 if (!string_buffer_init(&buffer)) 7514 { 7515 ERR("Failed to initialize shader buffer.\n"); 7516 return 0; 7517 } 7518 7519 GL_EXTCALL(glGenProgramsARB(1, &shader)); 7520 if (!shader) 7521 { 7522 string_buffer_free(&buffer); 7523 return 0; 7524 } 7525 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7526 7527 shader_addline(&buffer, "!!ARBfp1.0\n"); 7528 7529 if (type->use_color_key) 7530 { 7531 shader_addline(&buffer, "TEMP color;\n"); 7532 shader_addline(&buffer, "TEMP less, greater;\n"); 7533 shader_addline(&buffer, "PARAM color_key_low = program.local[%u];\n", ARBFP_BLIT_PARAM_COLOR_KEY_LOW); 7534 shader_addline(&buffer, "PARAM color_key_high = program.local[%u];\n", ARBFP_BLIT_PARAM_COLOR_KEY_HIGH); 7535 shader_addline(&buffer, "TEX color, fragment.texcoord[0], texture[0], %s;\n", tex_target); 7536 shader_addline(&buffer, "SLT less, color, color_key_low;\n"); /* below low key */ 7537 shader_addline(&buffer, "SGE greater, color, color_key_high;\n"); /* above high key */ 7538 shader_addline(&buffer, "ADD less, less, greater;\n"); /* or */ 7539 shader_addline(&buffer, "DP4 less.b, less, less;\n"); /* on any channel */ 7540 shader_addline(&buffer, "SGE less, -less.b, 0.0;\n"); /* logical not */ 7541 shader_addline(&buffer, "KIL -less;\n"); /* discard if true */ 7542 shader_addline(&buffer, "MOV result.color, color;\n"); 7543 } 7544 else 7545 { 7546 shader_addline(&buffer, "TEX result.color, fragment.texcoord[0], texture[0], %s;\n", tex_target); 7547 } 7548 7549 shader_addline(&buffer, "END\n"); 7550 7551 shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer); 7552 7553 string_buffer_free(&buffer); 7554 7555 return shader; 7556 } 7557 7558 /* Context activation is done by the caller. */ 7559 static HRESULT arbfp_blit_set(struct wined3d_arbfp_blitter *blitter, struct wined3d_context *context, 7560 const struct wined3d_surface *surface, const struct wined3d_color_key *color_key) 7561 { 7562 const struct wined3d_texture *texture = surface->container; 7563 enum complex_fixup fixup; 7564 const struct wined3d_gl_info *gl_info = context->gl_info; 7565 struct wine_rb_entry *entry; 7566 struct arbfp_blit_type type; 7567 struct arbfp_blit_desc *desc; 7568 struct wined3d_color float_color_key[2]; 7569 struct wined3d_vec4 size; 7570 GLuint shader; 7571 7572 size.x = wined3d_texture_get_level_pow2_width(texture, surface->texture_level); 7573 size.y = wined3d_texture_get_level_pow2_height(texture, surface->texture_level); 7574 size.z = 1.0f; 7575 size.w = 1.0f; 7576 7577 if (is_complex_fixup(texture->resource.format->color_fixup)) 7578 fixup = get_complex_fixup(texture->resource.format->color_fixup); 7579 else 7580 fixup = COMPLEX_FIXUP_NONE; 7581 7582 switch (texture->target) 7583 { 7584 case GL_TEXTURE_1D: 7585 type.res_type = WINED3D_GL_RES_TYPE_TEX_1D; 7586 break; 7587 7588 case GL_TEXTURE_2D: 7589 type.res_type = WINED3D_GL_RES_TYPE_TEX_2D; 7590 break; 7591 7592 case GL_TEXTURE_3D: 7593 type.res_type = WINED3D_GL_RES_TYPE_TEX_3D; 7594 break; 7595 7596 case GL_TEXTURE_CUBE_MAP_ARB: 7597 type.res_type = WINED3D_GL_RES_TYPE_TEX_CUBE; 7598 break; 7599 7600 case GL_TEXTURE_RECTANGLE_ARB: 7601 type.res_type = WINED3D_GL_RES_TYPE_TEX_RECT; 7602 break; 7603 7604 default: 7605 ERR("Unexpected GL texture type %#x.\n", texture->target); 7606 type.res_type = WINED3D_GL_RES_TYPE_TEX_2D; 7607 } 7608 type.fixup = fixup; 7609 type.use_color_key = !!color_key; 7610 type.padding = 0; 7611 7612 if ((entry = wine_rb_get(&blitter->shaders, &type))) 7613 { 7614 desc = WINE_RB_ENTRY_VALUE(entry, struct arbfp_blit_desc, entry); 7615 shader = desc->shader; 7616 } 7617 else 7618 { 7619 switch (fixup) 7620 { 7621 case COMPLEX_FIXUP_NONE: 7622 if (!is_identity_fixup(texture->resource.format->color_fixup)) 7623 FIXME("Implement support for sign or swizzle fixups.\n"); 7624 shader = arbfp_gen_plain_shader(gl_info, &type); 7625 break; 7626 7627 case COMPLEX_FIXUP_P8: 7628 shader = gen_p8_shader(gl_info, &type); 7629 break; 7630 7631 case COMPLEX_FIXUP_YUY2: 7632 case COMPLEX_FIXUP_UYVY: 7633 case COMPLEX_FIXUP_YV12: 7634 case COMPLEX_FIXUP_NV12: 7635 shader = gen_yuv_shader(gl_info, &type); 7636 break; 7637 } 7638 7639 if (!shader) 7640 { 7641 FIXME("Unsupported complex fixup %#x, not setting a shader\n", fixup); 7642 return E_NOTIMPL; 7643 } 7644 7645 desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*desc)); 7646 if (!desc) 7647 goto err_out; 7648 7649 desc->type = type; 7650 desc->shader = shader; 7651 if (wine_rb_put(&blitter->shaders, &desc->type, &desc->entry) == -1) 7652 { 7653 err_out: 7654 ERR("Out of memory\n"); 7655 GL_EXTCALL(glDeleteProgramsARB(1, &shader)); 7656 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader))"); 7657 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0)); 7658 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0)"); 7659 HeapFree(GetProcessHeap(), 0, desc); 7660 return E_OUTOFMEMORY; 7661 } 7662 } 7663 7664 if (fixup == COMPLEX_FIXUP_P8) 7665 upload_palette(blitter, texture, context); 7666 7667 gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB); 7668 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)"); 7669 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7670 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)"); 7671 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARBFP_BLIT_PARAM_SIZE, &size.x)); 7672 checkGLcall("glProgramLocalParameter4fvARB"); 7673 if (type.use_color_key) 7674 { 7675 wined3d_format_get_float_color_key(texture->resource.format, color_key, float_color_key); 7676 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 7677 ARBFP_BLIT_PARAM_COLOR_KEY_LOW, &float_color_key[0].r)); 7678 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 7679 ARBFP_BLIT_PARAM_COLOR_KEY_HIGH, &float_color_key[1].r)); 7680 checkGLcall("glProgramLocalParameter4fvARB"); 7681 } 7682 7683 return WINED3D_OK; 7684 } 7685 7686 /* Context activation is done by the caller. */ 7687 static void arbfp_blit_unset(const struct wined3d_gl_info *gl_info) 7688 { 7689 gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB); 7690 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 7691 } 7692 7693 static BOOL arbfp_blit_supported(const struct wined3d_gl_info *gl_info, 7694 const struct wined3d_d3d_info *d3d_info, enum wined3d_blit_op blit_op, 7695 enum wined3d_pool src_pool, const struct wined3d_format *src_format, DWORD src_location, 7696 enum wined3d_pool dst_pool, const struct wined3d_format *dst_format, DWORD dst_location) 7697 { 7698 enum complex_fixup src_fixup; 7699 BOOL decompress; 7700 7701 if (!gl_info->supported[ARB_FRAGMENT_PROGRAM]) 7702 return FALSE; 7703 7704 if (blit_op == WINED3D_BLIT_OP_RAW_BLIT && dst_format->id == src_format->id) 7705 { 7706 if (dst_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)) 7707 blit_op = WINED3D_BLIT_OP_DEPTH_BLIT; 7708 else 7709 blit_op = WINED3D_BLIT_OP_COLOR_BLIT; 7710 } 7711 7712 switch (blit_op) 7713 { 7714 case WINED3D_BLIT_OP_COLOR_BLIT_CKEY: 7715 if (!d3d_info->shader_color_key) 7716 { 7717 /* The conversion modifies the alpha channel so the color key might no longer match. */ 7718 TRACE("Color keying not supported with converted textures.\n"); 7719 return FALSE; 7720 } 7721 case WINED3D_BLIT_OP_COLOR_BLIT_ALPHATEST: 7722 case WINED3D_BLIT_OP_COLOR_BLIT: 7723 break; 7724 7725 default: 7726 TRACE("Unsupported blit_op=%d\n", blit_op); 7727 return FALSE; 7728 } 7729 7730 decompress = src_format && (src_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED) 7731 && !(dst_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED); 7732 if (!decompress && (dst_pool == WINED3D_POOL_SYSTEM_MEM || src_pool == WINED3D_POOL_SYSTEM_MEM)) 7733 return FALSE; 7734 7735 src_fixup = get_complex_fixup(src_format->color_fixup); 7736 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d)) 7737 { 7738 TRACE("Checking support for fixup:\n"); 7739 dump_color_fixup_desc(src_format->color_fixup); 7740 } 7741 7742 if (!is_identity_fixup(dst_format->color_fixup) 7743 && (dst_format->id != src_format->id || dst_location != WINED3D_LOCATION_DRAWABLE)) 7744 { 7745 TRACE("Destination fixups are not supported\n"); 7746 return FALSE; 7747 } 7748 7749 if (is_identity_fixup(src_format->color_fixup)) 7750 { 7751 TRACE("[OK]\n"); 7752 return TRUE; 7753 } 7754 7755 /* We only support YUV conversions. */ 7756 if (!is_complex_fixup(src_format->color_fixup)) 7757 { 7758 if (wined3d_settings.offscreen_rendering_mode == ORM_BACKBUFFER) 7759 { 7760 WARN("Claiming fixup support because of ORM_BACKBUFFER.\n"); 7761 return TRUE; 7762 } 7763 7764 TRACE("[FAILED]\n"); 7765 return FALSE; 7766 } 7767 7768 switch(src_fixup) 7769 { 7770 case COMPLEX_FIXUP_YUY2: 7771 case COMPLEX_FIXUP_UYVY: 7772 case COMPLEX_FIXUP_YV12: 7773 case COMPLEX_FIXUP_NV12: 7774 case COMPLEX_FIXUP_P8: 7775 TRACE("[OK]\n"); 7776 return TRUE; 7777 7778 default: 7779 FIXME("Unsupported YUV fixup %#x\n", src_fixup); 7780 TRACE("[FAILED]\n"); 7781 return FALSE; 7782 } 7783 } 7784 7785 static DWORD arbfp_blitter_blit(struct wined3d_blitter *blitter, enum wined3d_blit_op op, 7786 struct wined3d_context *context, struct wined3d_surface *src_surface, DWORD src_location, 7787 const RECT *src_rect, struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect, 7788 const struct wined3d_color_key *color_key, enum wined3d_texture_filter_type filter) 7789 { 7790 struct wined3d_texture *src_texture = src_surface->container; 7791 struct wined3d_texture *dst_texture = dst_surface->container; 7792 struct wined3d_device *device = dst_texture->resource.device; 7793 struct wined3d_arbfp_blitter *arbfp_blitter; 7794 struct wined3d_color_key alpha_test_key; 7795 struct wined3d_blitter *next; 7796 RECT s, d; 7797 7798 if (!arbfp_blit_supported(&device->adapter->gl_info, &device->adapter->d3d_info, op, 7799 src_texture->resource.pool, src_texture->resource.format, src_location, 7800 dst_texture->resource.pool, dst_texture->resource.format, dst_location)) 7801 { 7802 if ((next = blitter->next)) 7803 return next->ops->blitter_blit(next, op, context, src_surface, src_location, 7804 src_rect, dst_surface, dst_location, dst_rect, color_key, filter); 7805 } 7806 7807 arbfp_blitter = CONTAINING_RECORD(blitter, struct wined3d_arbfp_blitter, blitter); 7808 7809 /* Now load the surface */ 7810 if (wined3d_settings.offscreen_rendering_mode != ORM_FBO 7811 && (surface_get_sub_resource(src_surface)->locations 7812 & (WINED3D_LOCATION_TEXTURE_RGB | WINED3D_LOCATION_DRAWABLE)) 7813 == WINED3D_LOCATION_DRAWABLE 7814 && !wined3d_resource_is_offscreen(&src_texture->resource)) 7815 { 7816 /* Without FBO blits transferring from the drawable to the texture is 7817 * expensive, because we have to flip the data in sysmem. Since we can 7818 * flip in the blitter, we don't actually need that flip anyway. So we 7819 * use the surface's texture as scratch texture, and flip the source 7820 * rectangle instead. */ 7821 surface_load_fb_texture(src_surface, FALSE, context); 7822 7823 s = *src_rect; 7824 s.top = wined3d_texture_get_level_height(src_texture, src_surface->texture_level) - s.top; 7825 s.bottom = wined3d_texture_get_level_height(src_texture, src_surface->texture_level) - s.bottom; 7826 src_rect = &s; 7827 } 7828 else 7829 wined3d_texture_load(src_texture, context, FALSE); 7830 7831 context_apply_blit_state(context, device); 7832 7833 if (dst_location == WINED3D_LOCATION_DRAWABLE) 7834 { 7835 d = *dst_rect; 7836 surface_translate_drawable_coords(dst_surface, context->win_handle, &d); 7837 dst_rect = &d; 7838 } 7839 7840 if (wined3d_settings.offscreen_rendering_mode == ORM_FBO) 7841 { 7842 GLenum buffer; 7843 7844 if (dst_location == WINED3D_LOCATION_DRAWABLE) 7845 { 7846 TRACE("Destination surface %p is onscreen.\n", dst_surface); 7847 buffer = wined3d_texture_get_gl_buffer(dst_texture); 7848 } 7849 else 7850 { 7851 TRACE("Destination surface %p is offscreen.\n", dst_surface); 7852 buffer = GL_COLOR_ATTACHMENT0; 7853 } 7854 context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location); 7855 context_set_draw_buffer(context, buffer); 7856 context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER); 7857 context_invalidate_state(context, STATE_FRAMEBUFFER); 7858 } 7859 7860 if (op == WINED3D_BLIT_OP_COLOR_BLIT_ALPHATEST) 7861 { 7862 const struct wined3d_format *fmt = src_texture->resource.format; 7863 alpha_test_key.color_space_low_value = 0; 7864 alpha_test_key.color_space_high_value = ~(((1u << fmt->alpha_size) - 1) << fmt->alpha_offset); 7865 color_key = &alpha_test_key; 7866 } 7867 7868 arbfp_blit_set(arbfp_blitter, context, src_surface, color_key); 7869 7870 /* Draw a textured quad */ 7871 draw_textured_quad(src_surface, context, src_rect, dst_rect, filter); 7872 7873 /* Leave the opengl state valid for blitting */ 7874 arbfp_blit_unset(context->gl_info); 7875 7876 if (wined3d_settings.strict_draw_ordering 7877 || (dst_texture->swapchain && (dst_texture->swapchain->front_buffer == dst_texture))) 7878 context->gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */ 7879 7880 return dst_location; 7881 } 7882 7883 static void arbfp_blitter_clear(struct wined3d_blitter *blitter, struct wined3d_device *device, 7884 unsigned int rt_count, const struct wined3d_fb_state *fb, unsigned int rect_count, const RECT *clear_rects, 7885 const RECT *draw_rect, DWORD flags, const struct wined3d_color *colour, float depth, DWORD stencil) 7886 { 7887 struct wined3d_blitter *next; 7888 7889 if ((next = blitter->next)) 7890 next->ops->blitter_clear(next, device, rt_count, fb, rect_count, 7891 clear_rects, draw_rect, flags, colour, depth, stencil); 7892 } 7893 7894 static const struct wined3d_blitter_ops arbfp_blitter_ops = 7895 { 7896 arbfp_blitter_destroy, 7897 arbfp_blitter_clear, 7898 arbfp_blitter_blit, 7899 }; 7900 7901 void wined3d_arbfp_blitter_create(struct wined3d_blitter **next, const struct wined3d_device *device) 7902 { 7903 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 7904 struct wined3d_arbfp_blitter *blitter; 7905 7906 if (device->shader_backend != &arb_program_shader_backend 7907 && device->shader_backend != &glsl_shader_backend) 7908 return; 7909 7910 if (!gl_info->supported[ARB_FRAGMENT_PROGRAM]) 7911 return; 7912 7913 if (!gl_info->supported[WINED3D_GL_LEGACY_CONTEXT]) 7914 return; 7915 7916 if (!(blitter = HeapAlloc(GetProcessHeap(), 0, sizeof(*blitter)))) 7917 { 7918 ERR("Failed to allocate blitter.\n"); 7919 return; 7920 } 7921 7922 TRACE("Created blitter %p.\n", blitter); 7923 7924 blitter->blitter.ops = &arbfp_blitter_ops; 7925 blitter->blitter.next = *next; 7926 wine_rb_init(&blitter->shaders, arbfp_blit_type_compare); 7927 blitter->palette_texture = 0; 7928 *next = &blitter->blitter; 7929 } 7930