1 /* 2 * Pixel and vertex shaders implementation using ARB_vertex_program 3 * and ARB_fragment_program GL extensions. 4 * 5 * Copyright 2002-2003 Jason Edmeades 6 * Copyright 2002-2003 Raphael Junqueira 7 * Copyright 2004 Christian Costa 8 * Copyright 2005 Oliver Stieber 9 * Copyright 2006 Ivan Gyurdiev 10 * Copyright 2006 Jason Green 11 * Copyright 2006 Henri Verbeet 12 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers 13 * Copyright 2009 Henri Verbeet for CodeWeavers 14 * 15 * This library is free software; you can redistribute it and/or 16 * modify it under the terms of the GNU Lesser General Public 17 * License as published by the Free Software Foundation; either 18 * version 2.1 of the License, or (at your option) any later version. 19 * 20 * This library is distributed in the hope that it will be useful, 21 * but WITHOUT ANY WARRANTY; without even the implied warranty of 22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 * Lesser General Public License for more details. 24 * 25 * You should have received a copy of the GNU Lesser General Public 26 * License along with this library; if not, write to the Free Software 27 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 28 */ 29 30 #include "config.h" 31 32 #include <math.h> 33 #include <stdio.h> 34 35 #include "wined3d_private.h" 36 37 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader); 38 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants); 39 WINE_DECLARE_DEBUG_CHANNEL(d3d_caps); 40 WINE_DECLARE_DEBUG_CHANNEL(d3d); 41 42 /* Extract a line. Note that this modifies the source string. */ 43 static char *get_line(char **ptr) 44 { 45 char *p, *q; 46 47 p = *ptr; 48 if (!(q = strstr(p, "\n"))) 49 { 50 if (!*p) return NULL; 51 *ptr += strlen(p); 52 return p; 53 } 54 *q = '\0'; 55 *ptr = q + 1; 56 57 return p; 58 } 59 60 static void shader_arb_dump_program_source(const char *source) 61 { 62 ULONG source_size; 63 char *ptr, *line, *tmp; 64 65 source_size = strlen(source) + 1; 66 tmp = HeapAlloc(GetProcessHeap(), 0, source_size); 67 if (!tmp) 68 { 69 ERR("Failed to allocate %u bytes for shader source.\n", source_size); 70 return; 71 } 72 memcpy(tmp, source, source_size); 73 74 ptr = tmp; 75 while ((line = get_line(&ptr))) FIXME(" %s\n", line); 76 FIXME("\n"); 77 78 HeapFree(GetProcessHeap(), 0, tmp); 79 } 80 81 /* GL locking for state handlers is done by the caller. */ 82 static BOOL need_rel_addr_const(IWineD3DBaseShaderImpl *shader, const struct wined3d_gl_info *gl_info) 83 { 84 if (shader->baseShader.reg_maps.shader_version.type == WINED3D_SHADER_TYPE_VERTEX) 85 { 86 if (((IWineD3DVertexShaderImpl *)shader)->rel_offset) return TRUE; 87 } 88 if (!shader->baseShader.reg_maps.usesmova) return FALSE; 89 return !gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]; 90 } 91 92 /* Returns TRUE if result.clip from GL_NV_vertex_program2 should be used and FALSE otherwise */ 93 static inline BOOL use_nv_clip(const struct wined3d_gl_info *gl_info) 94 { 95 return gl_info->supported[NV_VERTEX_PROGRAM2_OPTION] 96 && !(gl_info->quirks & WINED3D_QUIRK_NV_CLIP_BROKEN); 97 } 98 99 static BOOL need_helper_const(IWineD3DBaseShaderImpl *shader, const struct wined3d_gl_info *gl_info) 100 { 101 if (need_rel_addr_const(shader, gl_info)) return TRUE; 102 if (!gl_info->supported[NV_VERTEX_PROGRAM]) return TRUE; /* Need to init colors. */ 103 if (gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) return TRUE; /* Load the immval offset. */ 104 if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) return TRUE; /* Have to init texcoords. */ 105 if (!use_nv_clip(gl_info)) return TRUE; /* Init the clip texcoord */ 106 if (shader->baseShader.reg_maps.usesnrm) return TRUE; /* 0.0 */ 107 if (shader->baseShader.reg_maps.usesrcp) return TRUE; /* EPS */ 108 return FALSE; 109 } 110 111 static unsigned int reserved_vs_const(IWineD3DBaseShaderImpl *shader, const struct wined3d_gl_info *gl_info) 112 { 113 unsigned int ret = 1; 114 /* We use one PARAM for the pos fixup, and in some cases one to load 115 * some immediate values into the shader 116 */ 117 if(need_helper_const(shader, gl_info)) ret++; 118 if(need_rel_addr_const(shader, gl_info)) ret++; 119 return ret; 120 } 121 122 enum arb_helper_value 123 { 124 ARB_ZERO, 125 ARB_ONE, 126 ARB_TWO, 127 ARB_0001, 128 ARB_EPS, 129 130 ARB_VS_REL_OFFSET 131 }; 132 133 static const char *arb_get_helper_value(enum wined3d_shader_type shader, enum arb_helper_value value) 134 { 135 if (shader == WINED3D_SHADER_TYPE_GEOMETRY) 136 { 137 ERR("Geometry shaders are unsupported\n"); 138 return "bad"; 139 } 140 141 if (shader == WINED3D_SHADER_TYPE_PIXEL) 142 { 143 switch (value) 144 { 145 case ARB_ZERO: return "ps_helper_const.x"; 146 case ARB_ONE: return "ps_helper_const.y"; 147 case ARB_TWO: return "coefmul.x"; 148 case ARB_0001: return "helper_const.xxxy"; 149 case ARB_EPS: return "ps_helper_const.z"; 150 default: break; 151 } 152 } 153 else 154 { 155 switch (value) 156 { 157 case ARB_ZERO: return "helper_const.x"; 158 case ARB_ONE: return "helper_const.y"; 159 case ARB_TWO: return "helper_const.z"; 160 case ARB_EPS: return "helper_const.w"; 161 case ARB_0001: return "helper_const.xxxy"; 162 case ARB_VS_REL_OFFSET: return "rel_addr_const.y"; 163 } 164 } 165 FIXME("Unmanaged %s shader helper constant requested: %u\n", 166 shader == WINED3D_SHADER_TYPE_PIXEL ? "pixel" : "vertex", value); 167 switch (value) 168 { 169 case ARB_ZERO: return "0.0"; 170 case ARB_ONE: return "1.0"; 171 case ARB_TWO: return "2.0"; 172 case ARB_0001: return "{0.0, 0.0, 0.0, 1.0}"; 173 case ARB_EPS: return "1e-8"; 174 default: return "bad"; 175 } 176 } 177 178 static inline BOOL ffp_clip_emul(const struct wined3d_state *state) 179 { 180 return state->lowest_disabled_stage < 7; 181 } 182 183 /* ARB_program_shader private data */ 184 185 struct control_frame 186 { 187 struct list entry; 188 enum 189 { 190 IF, 191 IFC, 192 LOOP, 193 REP 194 } type; 195 BOOL muting; 196 BOOL outer_loop; 197 union 198 { 199 unsigned int loop; 200 unsigned int ifc; 201 } no; 202 struct wined3d_shader_loop_control loop_control; 203 BOOL had_else; 204 }; 205 206 struct arb_ps_np2fixup_info 207 { 208 struct ps_np2fixup_info super; 209 /* For ARB we need a offset value: 210 * With both GLSL and ARB mode the NP2 fixup information (the texture dimensions) are stored in a 211 * consecutive way (GLSL uses a uniform array). Since ARB doesn't know the notion of a "standalone" 212 * array we need an offset to the index inside the program local parameter array. */ 213 UINT offset; 214 }; 215 216 struct arb_ps_compile_args 217 { 218 struct ps_compile_args super; 219 WORD bools; 220 WORD clip; /* only a boolean, use a WORD for alignment */ 221 unsigned char loop_ctrl[MAX_CONST_I][3]; 222 }; 223 224 struct stb_const_desc 225 { 226 unsigned char texunit; 227 UINT const_num; 228 }; 229 230 struct arb_ps_compiled_shader 231 { 232 struct arb_ps_compile_args args; 233 struct arb_ps_np2fixup_info np2fixup_info; 234 struct stb_const_desc bumpenvmatconst[MAX_TEXTURES]; 235 struct stb_const_desc luminanceconst[MAX_TEXTURES]; 236 UINT int_consts[MAX_CONST_I]; 237 GLuint prgId; 238 UINT ycorrection; 239 unsigned char numbumpenvmatconsts; 240 char num_int_consts; 241 }; 242 243 struct arb_vs_compile_args 244 { 245 struct vs_compile_args super; 246 union 247 { 248 struct 249 { 250 WORD bools; 251 unsigned char clip_texcoord; 252 unsigned char clipplane_mask; 253 } boolclip; 254 DWORD boolclip_compare; 255 } clip; 256 DWORD ps_signature; 257 union 258 { 259 unsigned char samplers[4]; 260 DWORD samplers_compare; 261 } vertex; 262 unsigned char loop_ctrl[MAX_CONST_I][3]; 263 }; 264 265 struct arb_vs_compiled_shader 266 { 267 struct arb_vs_compile_args args; 268 GLuint prgId; 269 UINT int_consts[MAX_CONST_I]; 270 char num_int_consts; 271 char need_color_unclamp; 272 UINT pos_fixup; 273 }; 274 275 struct recorded_instruction 276 { 277 struct wined3d_shader_instruction ins; 278 struct list entry; 279 }; 280 281 struct shader_arb_ctx_priv 282 { 283 char addr_reg[20]; 284 enum 285 { 286 /* plain GL_ARB_vertex_program or GL_ARB_fragment_program */ 287 ARB, 288 /* GL_NV_vertex_progam2_option or GL_NV_fragment_program_option */ 289 NV2, 290 /* GL_NV_vertex_program3 or GL_NV_fragment_program2 */ 291 NV3 292 } target_version; 293 294 const struct arb_vs_compile_args *cur_vs_args; 295 const struct arb_ps_compile_args *cur_ps_args; 296 const struct arb_ps_compiled_shader *compiled_fprog; 297 const struct arb_vs_compiled_shader *compiled_vprog; 298 struct arb_ps_np2fixup_info *cur_np2fixup_info; 299 struct list control_frames; 300 struct list record; 301 BOOL recording; 302 BOOL muted; 303 unsigned int num_loops, loop_depth, num_ifcs; 304 int aL; 305 306 unsigned int vs_clipplanes; 307 BOOL footer_written; 308 BOOL in_main_func; 309 310 /* For 3.0 vertex shaders */ 311 const char *vs_output[MAX_REG_OUTPUT]; 312 /* For 2.x and earlier vertex shaders */ 313 const char *texcrd_output[8], *color_output[2], *fog_output; 314 315 /* 3.0 pshader input for compatibility with fixed function */ 316 const char *ps_input[MAX_REG_INPUT]; 317 }; 318 319 struct ps_signature 320 { 321 struct wined3d_shader_signature_element *sig; 322 DWORD idx; 323 struct wine_rb_entry entry; 324 }; 325 326 struct arb_pshader_private { 327 struct arb_ps_compiled_shader *gl_shaders; 328 UINT num_gl_shaders, shader_array_size; 329 BOOL has_signature_idx; 330 DWORD input_signature_idx; 331 DWORD clipplane_emulation; 332 BOOL clamp_consts; 333 }; 334 335 struct arb_vshader_private { 336 struct arb_vs_compiled_shader *gl_shaders; 337 UINT num_gl_shaders, shader_array_size; 338 }; 339 340 struct shader_arb_priv 341 { 342 GLuint current_vprogram_id; 343 GLuint current_fprogram_id; 344 const struct arb_ps_compiled_shader *compiled_fprog; 345 const struct arb_vs_compiled_shader *compiled_vprog; 346 GLuint depth_blt_vprogram_id; 347 GLuint depth_blt_fprogram_id_full[tex_type_count]; 348 GLuint depth_blt_fprogram_id_masked[tex_type_count]; 349 BOOL use_arbfp_fixed_func; 350 struct wine_rb_tree fragment_shaders; 351 BOOL last_ps_const_clamped; 352 BOOL last_vs_color_unclamp; 353 354 struct wine_rb_tree signature_tree; 355 DWORD ps_sig_number; 356 }; 357 358 /******************************************************** 359 * ARB_[vertex/fragment]_program helper functions follow 360 ********************************************************/ 361 362 /* Loads floating point constants into the currently set ARB_vertex/fragment_program. 363 * When constant_list == NULL, it will load all the constants. 364 * 365 * @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders) 366 * or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders) 367 */ 368 /* GL locking is done by the caller */ 369 static unsigned int shader_arb_load_constantsF(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info, 370 GLuint target_type, unsigned int max_constants, const float *constants, char *dirty_consts) 371 { 372 local_constant* lconst; 373 DWORD i, j; 374 unsigned int ret; 375 376 if (TRACE_ON(d3d_constants)) 377 { 378 for(i = 0; i < max_constants; i++) { 379 if(!dirty_consts[i]) continue; 380 TRACE_(d3d_constants)("Loading constants %i: %f, %f, %f, %f\n", i, 381 constants[i * 4 + 0], constants[i * 4 + 1], 382 constants[i * 4 + 2], constants[i * 4 + 3]); 383 } 384 } 385 386 i = 0; 387 388 /* In 1.X pixel shaders constants are implicitly clamped in the range [-1;1] */ 389 if (target_type == GL_FRAGMENT_PROGRAM_ARB && This->baseShader.reg_maps.shader_version.major == 1) 390 { 391 float lcl_const[4]; 392 /* ps 1.x supports only 8 constants, clamp only those. When switching between 1.x and higher 393 * shaders, the first 8 constants are marked dirty for reload 394 */ 395 for(; i < min(8, max_constants); i++) { 396 if(!dirty_consts[i]) continue; 397 dirty_consts[i] = 0; 398 399 j = 4 * i; 400 if (constants[j + 0] > 1.0f) lcl_const[0] = 1.0f; 401 else if (constants[j + 0] < -1.0f) lcl_const[0] = -1.0f; 402 else lcl_const[0] = constants[j + 0]; 403 404 if (constants[j + 1] > 1.0f) lcl_const[1] = 1.0f; 405 else if (constants[j + 1] < -1.0f) lcl_const[1] = -1.0f; 406 else lcl_const[1] = constants[j + 1]; 407 408 if (constants[j + 2] > 1.0f) lcl_const[2] = 1.0f; 409 else if (constants[j + 2] < -1.0f) lcl_const[2] = -1.0f; 410 else lcl_const[2] = constants[j + 2]; 411 412 if (constants[j + 3] > 1.0f) lcl_const[3] = 1.0f; 413 else if (constants[j + 3] < -1.0f) lcl_const[3] = -1.0f; 414 else lcl_const[3] = constants[j + 3]; 415 416 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, lcl_const)); 417 } 418 419 /* If further constants are dirty, reload them without clamping. 420 * 421 * The alternative is not to touch them, but then we cannot reset the dirty constant count 422 * to zero. That's bad for apps that only use PS 1.x shaders, because in that case the code 423 * above would always re-check the first 8 constants since max_constant remains at the init 424 * value 425 */ 426 } 427 428 if (gl_info->supported[EXT_GPU_PROGRAM_PARAMETERS]) 429 { 430 /* TODO: Benchmark if we're better of with finding the dirty constants ourselves, 431 * or just reloading *all* constants at once 432 * 433 GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, i, max_constants, constants + (i * 4))); 434 */ 435 for(; i < max_constants; i++) { 436 if(!dirty_consts[i]) continue; 437 438 /* Find the next block of dirty constants */ 439 dirty_consts[i] = 0; 440 j = i; 441 for(i++; (i < max_constants) && dirty_consts[i]; i++) { 442 dirty_consts[i] = 0; 443 } 444 445 GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, j, i - j, constants + (j * 4))); 446 } 447 } else { 448 for(; i < max_constants; i++) { 449 if(dirty_consts[i]) { 450 dirty_consts[i] = 0; 451 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, constants + (i * 4))); 452 } 453 } 454 } 455 checkGLcall("glProgramEnvParameter4fvARB()"); 456 457 /* Load immediate constants */ 458 if(This->baseShader.load_local_constsF) { 459 if (TRACE_ON(d3d_shader)) { 460 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) { 461 GLfloat* values = (GLfloat*)lconst->value; 462 TRACE_(d3d_constants)("Loading local constants %i: %f, %f, %f, %f\n", lconst->idx, 463 values[0], values[1], values[2], values[3]); 464 } 465 } 466 /* Immediate constants are clamped for 1.X shaders at loading times */ 467 ret = 0; 468 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) { 469 dirty_consts[lconst->idx] = 1; /* Dirtify so the non-immediate constant overwrites it next time */ 470 ret = max(ret, lconst->idx + 1); 471 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, lconst->idx, (GLfloat*)lconst->value)); 472 } 473 checkGLcall("glProgramEnvParameter4fvARB()"); 474 return ret; /* The loaded immediate constants need reloading for the next shader */ 475 } else { 476 return 0; /* No constants are dirty now */ 477 } 478 } 479 480 /** 481 * Loads the texture dimensions for NP2 fixup into the currently set ARB_[vertex/fragment]_programs. 482 */ 483 static void shader_arb_load_np2fixup_constants( 484 IWineD3DDevice* device, 485 char usePixelShader, 486 char useVertexShader) { 487 488 IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl *) device; 489 const struct shader_arb_priv* const priv = (const struct shader_arb_priv *) deviceImpl->shader_priv; 490 IWineD3DStateBlockImpl* stateBlock = deviceImpl->stateBlock; 491 const struct wined3d_gl_info *gl_info = &deviceImpl->adapter->gl_info; 492 493 if (!usePixelShader) { 494 /* NP2 texcoord fixup is (currently) only done for pixelshaders. */ 495 return; 496 } 497 498 if (priv->compiled_fprog && priv->compiled_fprog->np2fixup_info.super.active) { 499 const struct arb_ps_np2fixup_info* const fixup = &priv->compiled_fprog->np2fixup_info; 500 UINT i; 501 WORD active = fixup->super.active; 502 GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS]; 503 504 for (i = 0; active; active >>= 1, ++i) { 505 const unsigned char idx = fixup->super.idx[i]; 506 const IWineD3DBaseTextureImpl *tex = stateBlock->state.textures[i]; 507 GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4]; 508 509 if (!(active & 1)) continue; 510 511 if (!tex) { 512 FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n"); 513 continue; 514 } 515 516 if (idx % 2) { 517 tex_dim[2] = tex->baseTexture.pow2Matrix[0]; tex_dim[3] = tex->baseTexture.pow2Matrix[5]; 518 } else { 519 tex_dim[0] = tex->baseTexture.pow2Matrix[0]; tex_dim[1] = tex->baseTexture.pow2Matrix[5]; 520 } 521 } 522 523 for (i = 0; i < fixup->super.num_consts; ++i) { 524 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 525 fixup->offset + i, &np2fixup_constants[i * 4])); 526 } 527 } 528 } 529 530 /* GL locking is done by the caller. */ 531 static inline void shader_arb_ps_local_constants(IWineD3DDeviceImpl* deviceImpl) 532 { 533 const struct wined3d_context *context = context_get_current(); 534 IWineD3DStateBlockImpl* stateBlock = deviceImpl->stateBlock; 535 const struct wined3d_gl_info *gl_info = context->gl_info; 536 unsigned char i; 537 struct shader_arb_priv *priv = deviceImpl->shader_priv; 538 const struct arb_ps_compiled_shader *gl_shader = priv->compiled_fprog; 539 540 for(i = 0; i < gl_shader->numbumpenvmatconsts; i++) 541 { 542 int texunit = gl_shader->bumpenvmatconst[i].texunit; 543 544 /* The state manager takes care that this function is always called if the bump env matrix changes */ 545 const float *data = (const float *)&stateBlock->state.texture_states[texunit][WINED3DTSS_BUMPENVMAT00]; 546 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 547 gl_shader->bumpenvmatconst[i].const_num, data)); 548 549 if (gl_shader->luminanceconst[i].const_num != WINED3D_CONST_NUM_UNUSED) 550 { 551 /* WINED3DTSS_BUMPENVLSCALE and WINED3DTSS_BUMPENVLOFFSET are next to each other. 552 * point gl to the scale, and load 4 floats. x = scale, y = offset, z and w are junk, we 553 * don't care about them. The pointers are valid for sure because the stateblock is bigger. 554 * (they're WINED3DTSS_TEXTURETRANSFORMFLAGS and WINED3DTSS_ADDRESSW, so most likely 0 or NaN 555 */ 556 const float *scale = (const float *)&stateBlock->state.texture_states[texunit][WINED3DTSS_BUMPENVLSCALE]; 557 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 558 gl_shader->luminanceconst[i].const_num, scale)); 559 } 560 } 561 checkGLcall("Load bumpmap consts"); 562 563 if(gl_shader->ycorrection != WINED3D_CONST_NUM_UNUSED) 564 { 565 /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen). 566 * ycorrection.y: -1.0(onscreen), 1.0(offscreen) 567 * ycorrection.z: 1.0 568 * ycorrection.w: 0.0 569 */ 570 float val[4]; 571 val[0] = context->render_offscreen ? 0.0f 572 : deviceImpl->render_targets[0]->currentDesc.Height; 573 val[1] = context->render_offscreen ? 1.0f : -1.0f; 574 val[2] = 1.0f; 575 val[3] = 0.0f; 576 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->ycorrection, val)); 577 checkGLcall("y correction loading"); 578 } 579 580 if (!gl_shader->num_int_consts) return; 581 582 for(i = 0; i < MAX_CONST_I; i++) 583 { 584 if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED) 585 { 586 float val[4]; 587 val[0] = (float)stateBlock->state.ps_consts_i[4 * i]; 588 val[1] = (float)stateBlock->state.ps_consts_i[4 * i + 1]; 589 val[2] = (float)stateBlock->state.ps_consts_i[4 * i + 2]; 590 val[3] = -1.0f; 591 592 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->int_consts[i], val)); 593 } 594 } 595 checkGLcall("Load ps int consts"); 596 } 597 598 /* GL locking is done by the caller. */ 599 static inline void shader_arb_vs_local_constants(IWineD3DDeviceImpl* deviceImpl) 600 { 601 IWineD3DStateBlockImpl* stateBlock; 602 const struct wined3d_gl_info *gl_info = &deviceImpl->adapter->gl_info; 603 unsigned char i; 604 struct shader_arb_priv *priv = deviceImpl->shader_priv; 605 const struct arb_vs_compiled_shader *gl_shader = priv->compiled_vprog; 606 607 /* Upload the position fixup */ 608 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->pos_fixup, deviceImpl->posFixup)); 609 610 if (!gl_shader->num_int_consts) return; 611 612 stateBlock = deviceImpl->stateBlock; 613 614 for(i = 0; i < MAX_CONST_I; i++) 615 { 616 if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED) 617 { 618 float val[4]; 619 val[0] = (float)stateBlock->state.vs_consts_i[4 * i]; 620 val[1] = (float)stateBlock->state.vs_consts_i[4 * i + 1]; 621 val[2] = (float)stateBlock->state.vs_consts_i[4 * i + 2]; 622 val[3] = -1.0f; 623 624 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->int_consts[i], val)); 625 } 626 } 627 checkGLcall("Load vs int consts"); 628 } 629 630 /** 631 * Loads the app-supplied constants into the currently set ARB_[vertex/fragment]_programs. 632 * 633 * We only support float constants in ARB at the moment, so don't 634 * worry about the Integers or Booleans 635 */ 636 /* GL locking is done by the caller (state handler) */ 637 static void shader_arb_load_constants(const struct wined3d_context *context, char usePixelShader, char useVertexShader) 638 { 639 IWineD3DDeviceImpl *device = context->swapchain->device; 640 IWineD3DStateBlockImpl* stateBlock = device->stateBlock; 641 const struct wined3d_gl_info *gl_info = context->gl_info; 642 643 if (useVertexShader) 644 { 645 IWineD3DBaseShaderImpl *vshader = (IWineD3DBaseShaderImpl *)stateBlock->state.vertex_shader; 646 647 /* Load DirectX 9 float constants for vertex shader */ 648 device->highest_dirty_vs_const = shader_arb_load_constantsF(vshader, gl_info, GL_VERTEX_PROGRAM_ARB, 649 device->highest_dirty_vs_const, stateBlock->state.vs_consts_f, context->vshader_const_dirty); 650 shader_arb_vs_local_constants(device); 651 } 652 653 if (usePixelShader) 654 { 655 IWineD3DBaseShaderImpl *pshader = (IWineD3DBaseShaderImpl *)stateBlock->state.pixel_shader; 656 657 /* Load DirectX 9 float constants for pixel shader */ 658 device->highest_dirty_ps_const = shader_arb_load_constantsF(pshader, gl_info, GL_FRAGMENT_PROGRAM_ARB, 659 device->highest_dirty_ps_const, stateBlock->state.ps_consts_f, context->pshader_const_dirty); 660 shader_arb_ps_local_constants(device); 661 } 662 } 663 664 static void shader_arb_update_float_vertex_constants(IWineD3DDevice *iface, UINT start, UINT count) 665 { 666 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface; 667 struct wined3d_context *context = context_get_current(); 668 669 /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active 670 * context. On a context switch the old context will be fully dirtified */ 671 if (!context || context->swapchain->device != This) return; 672 673 memset(context->vshader_const_dirty + start, 1, sizeof(*context->vshader_const_dirty) * count); 674 This->highest_dirty_vs_const = max(This->highest_dirty_vs_const, start + count); 675 } 676 677 static void shader_arb_update_float_pixel_constants(IWineD3DDevice *iface, UINT start, UINT count) 678 { 679 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface; 680 struct wined3d_context *context = context_get_current(); 681 682 /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active 683 * context. On a context switch the old context will be fully dirtified */ 684 if (!context || context->swapchain->device != This) return; 685 686 memset(context->pshader_const_dirty + start, 1, sizeof(*context->pshader_const_dirty) * count); 687 This->highest_dirty_ps_const = max(This->highest_dirty_ps_const, start + count); 688 } 689 690 static DWORD *local_const_mapping(IWineD3DBaseShaderImpl *This) 691 { 692 DWORD *ret; 693 DWORD idx = 0; 694 const local_constant *lconst; 695 696 if(This->baseShader.load_local_constsF || list_empty(&This->baseShader.constantsF)) return NULL; 697 698 ret = HeapAlloc(GetProcessHeap(), 0, sizeof(DWORD) * This->baseShader.limits.constant_float); 699 if(!ret) { 700 ERR("Out of memory\n"); 701 return NULL; 702 } 703 704 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) { 705 ret[lconst->idx] = idx++; 706 } 707 return ret; 708 } 709 710 /* Generate the variable & register declarations for the ARB_vertex_program output target */ 711 static DWORD shader_generate_arb_declarations(IWineD3DBaseShader *iface, const shader_reg_maps *reg_maps, 712 struct wined3d_shader_buffer *buffer, const struct wined3d_gl_info *gl_info, DWORD *lconst_map, 713 DWORD *num_clipplanes, struct shader_arb_ctx_priv *ctx) 714 { 715 IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface; 716 DWORD i, next_local = 0; 717 char pshader = shader_is_pshader_version(reg_maps->shader_version.type); 718 unsigned max_constantsF; 719 const local_constant *lconst; 720 DWORD map; 721 722 /* In pixel shaders, all private constants are program local, we don't need anything 723 * from program.env. Thus we can advertise the full set of constants in pixel shaders. 724 * If we need a private constant the GL implementation will squeeze it in somewhere 725 * 726 * With vertex shaders we need the posFixup and on some GL implementations 4 helper 727 * immediate values. The posFixup is loaded using program.env for now, so always 728 * subtract one from the number of constants. If the shader uses indirect addressing, 729 * account for the helper const too because we have to declare all availabke d3d constants 730 * and don't know which are actually used. 731 */ 732 if (pshader) 733 { 734 max_constantsF = gl_info->limits.arb_ps_native_constants; 735 /* 24 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. */ 736 if (max_constantsF < 24) 737 max_constantsF = gl_info->limits.arb_ps_float_constants; 738 } 739 else 740 { 741 max_constantsF = gl_info->limits.arb_vs_native_constants; 742 /* 96 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. 743 * Also prevents max_constantsF from becoming less than 0 and 744 * wrapping . */ 745 if (max_constantsF < 96) 746 max_constantsF = gl_info->limits.arb_vs_float_constants; 747 748 if(This->baseShader.reg_maps.usesrelconstF) { 749 DWORD highest_constf = 0, clip_limit; 750 751 max_constantsF -= reserved_vs_const(This, gl_info); 752 max_constantsF -= count_bits(This->baseShader.reg_maps.integer_constants); 753 754 for(i = 0; i < This->baseShader.limits.constant_float; i++) 755 { 756 DWORD idx = i >> 5; 757 DWORD shift = i & 0x1f; 758 if(reg_maps->constf[idx] & (1 << shift)) highest_constf = i; 759 } 760 761 if(use_nv_clip(gl_info) && ctx->target_version >= NV2) 762 { 763 if(ctx->cur_vs_args->super.clip_enabled) 764 clip_limit = gl_info->limits.clipplanes; 765 else 766 clip_limit = 0; 767 } 768 else 769 { 770 unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask; 771 clip_limit = min(count_bits(mask), 4); 772 } 773 *num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1); 774 max_constantsF -= *num_clipplanes; 775 if(*num_clipplanes < clip_limit) 776 { 777 WARN("Only %u clipplanes out of %u enabled\n", *num_clipplanes, gl_info->limits.clipplanes); 778 } 779 } 780 else 781 { 782 if (ctx->target_version >= NV2) *num_clipplanes = gl_info->limits.clipplanes; 783 else *num_clipplanes = min(gl_info->limits.clipplanes, 4); 784 } 785 } 786 787 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) 788 { 789 if (map & 1) shader_addline(buffer, "TEMP R%u;\n", i); 790 } 791 792 for (i = 0, map = reg_maps->address; map; map >>= 1, ++i) 793 { 794 if (map & 1) shader_addline(buffer, "ADDRESS A%u;\n", i); 795 } 796 797 if (pshader && reg_maps->shader_version.major == 1 && reg_maps->shader_version.minor <= 3) 798 { 799 for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i) 800 { 801 if (map & 1) shader_addline(buffer, "TEMP T%u;\n", i); 802 } 803 } 804 805 /* Load local constants using the program-local space, 806 * this avoids reloading them each time the shader is used 807 */ 808 if(lconst_map) { 809 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) { 810 shader_addline(buffer, "PARAM C%u = program.local[%u];\n", lconst->idx, 811 lconst_map[lconst->idx]); 812 next_local = max(next_local, lconst_map[lconst->idx] + 1); 813 } 814 } 815 816 /* After subtracting privately used constants from the hardware limit(they are loaded as 817 * local constants), make sure the shader doesn't violate the env constant limit 818 */ 819 if(pshader) 820 { 821 max_constantsF = min(max_constantsF, gl_info->limits.arb_ps_float_constants); 822 } 823 else 824 { 825 max_constantsF = min(max_constantsF, gl_info->limits.arb_vs_float_constants); 826 } 827 828 /* Avoid declaring more constants than needed */ 829 max_constantsF = min(max_constantsF, This->baseShader.limits.constant_float); 830 831 /* we use the array-based constants array if the local constants are marked for loading, 832 * because then we use indirect addressing, or when the local constant list is empty, 833 * because then we don't know if we're using indirect addressing or not. If we're hardcoding 834 * local constants do not declare the loaded constants as an array because ARB compilers usually 835 * do not optimize unused constants away 836 */ 837 if(This->baseShader.reg_maps.usesrelconstF) { 838 /* Need to PARAM the environment parameters (constants) so we can use relative addressing */ 839 shader_addline(buffer, "PARAM C[%d] = { program.env[0..%d] };\n", 840 max_constantsF, max_constantsF - 1); 841 } else { 842 for(i = 0; i < max_constantsF; i++) { 843 DWORD idx, mask; 844 idx = i >> 5; 845 mask = 1 << (i & 0x1f); 846 if(!shader_constant_is_local(This, i) && (This->baseShader.reg_maps.constf[idx] & mask)) { 847 shader_addline(buffer, "PARAM C%d = program.env[%d];\n",i, i); 848 } 849 } 850 } 851 852 return next_local; 853 } 854 855 static const char * const shift_tab[] = { 856 "dummy", /* 0 (none) */ 857 "coefmul.x", /* 1 (x2) */ 858 "coefmul.y", /* 2 (x4) */ 859 "coefmul.z", /* 3 (x8) */ 860 "coefmul.w", /* 4 (x16) */ 861 "dummy", /* 5 (x32) */ 862 "dummy", /* 6 (x64) */ 863 "dummy", /* 7 (x128) */ 864 "dummy", /* 8 (d256) */ 865 "dummy", /* 9 (d128) */ 866 "dummy", /* 10 (d64) */ 867 "dummy", /* 11 (d32) */ 868 "coefdiv.w", /* 12 (d16) */ 869 "coefdiv.z", /* 13 (d8) */ 870 "coefdiv.y", /* 14 (d4) */ 871 "coefdiv.x" /* 15 (d2) */ 872 }; 873 874 static void shader_arb_get_write_mask(const struct wined3d_shader_instruction *ins, 875 const struct wined3d_shader_dst_param *dst, char *write_mask) 876 { 877 char *ptr = write_mask; 878 879 if (dst->write_mask != WINED3DSP_WRITEMASK_ALL) 880 { 881 *ptr++ = '.'; 882 if (dst->write_mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x'; 883 if (dst->write_mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y'; 884 if (dst->write_mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z'; 885 if (dst->write_mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w'; 886 } 887 888 *ptr = '\0'; 889 } 890 891 static void shader_arb_get_swizzle(const struct wined3d_shader_src_param *param, BOOL fixup, char *swizzle_str) 892 { 893 /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra", 894 * but addressed as "rgba". To fix this we need to swap the register's x 895 * and z components. */ 896 const char *swizzle_chars = fixup ? "zyxw" : "xyzw"; 897 char *ptr = swizzle_str; 898 899 /* swizzle bits fields: wwzzyyxx */ 900 DWORD swizzle = param->swizzle; 901 DWORD swizzle_x = swizzle & 0x03; 902 DWORD swizzle_y = (swizzle >> 2) & 0x03; 903 DWORD swizzle_z = (swizzle >> 4) & 0x03; 904 DWORD swizzle_w = (swizzle >> 6) & 0x03; 905 906 /* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to 907 * generate a swizzle string. Unless we need to our own swizzling. */ 908 if (swizzle != WINED3DSP_NOSWIZZLE || fixup) 909 { 910 *ptr++ = '.'; 911 if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) { 912 *ptr++ = swizzle_chars[swizzle_x]; 913 } else { 914 *ptr++ = swizzle_chars[swizzle_x]; 915 *ptr++ = swizzle_chars[swizzle_y]; 916 *ptr++ = swizzle_chars[swizzle_z]; 917 *ptr++ = swizzle_chars[swizzle_w]; 918 } 919 } 920 921 *ptr = '\0'; 922 } 923 924 static void shader_arb_request_a0(const struct wined3d_shader_instruction *ins, const char *src) 925 { 926 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 927 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 928 929 if (!strcmp(priv->addr_reg, src)) return; 930 931 strcpy(priv->addr_reg, src); 932 shader_addline(buffer, "ARL A0.x, %s;\n", src); 933 } 934 935 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins, 936 const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr); 937 938 static void shader_arb_get_register_name(const struct wined3d_shader_instruction *ins, 939 const struct wined3d_shader_register *reg, char *register_name, BOOL *is_color) 940 { 941 /* oPos, oFog and oPts in D3D */ 942 static const char * const rastout_reg_names[] = {"TMP_OUT", "result.fogcoord", "result.pointsize"}; 943 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader; 944 BOOL pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type); 945 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 946 947 *is_color = FALSE; 948 949 switch (reg->type) 950 { 951 case WINED3DSPR_TEMP: 952 sprintf(register_name, "R%u", reg->idx); 953 break; 954 955 case WINED3DSPR_INPUT: 956 if (pshader) 957 { 958 if(This->baseShader.reg_maps.shader_version.major < 3) 959 { 960 if (!reg->idx) strcpy(register_name, "fragment.color.primary"); 961 else strcpy(register_name, "fragment.color.secondary"); 962 } 963 else 964 { 965 if(reg->rel_addr) 966 { 967 char rel_reg[50]; 968 shader_arb_get_src_param(ins, reg->rel_addr, 0, rel_reg); 969 970 if (!strcmp(rel_reg, "**aL_emul**")) 971 { 972 DWORD idx = ctx->aL + reg->idx; 973 if(idx < MAX_REG_INPUT) 974 { 975 strcpy(register_name, ctx->ps_input[idx]); 976 } 977 else 978 { 979 ERR("Pixel shader input register out of bounds: %u\n", idx); 980 sprintf(register_name, "out_of_bounds_%u", idx); 981 } 982 } 983 else if(This->baseShader.reg_maps.input_registers & 0x0300) 984 { 985 /* There are two ways basically: 986 * 987 * 1) Use the unrolling code that is used for loop emulation and unroll the loop. 988 * That means trouble if the loop also contains a breakc or if the control values 989 * aren't local constants. 990 * 2) Generate an if block that checks if aL.y < 8, == 8 or == 9 and selects the 991 * source dynamically. The trouble is that we cannot simply read aL.y because it 992 * is an ADDRESS register. We could however push it, load .zw with a value and use 993 * ADAC to load the condition code register and pop it again afterwards 994 */ 995 FIXME("Relative input register addressing with more than 8 registers\n"); 996 997 /* This is better than nothing for now */ 998 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx); 999 } 1000 else if(ctx->cur_ps_args->super.vp_mode != vertexshader) 1001 { 1002 /* This is problematic because we'd have to consult the ctx->ps_input strings 1003 * for where to find the varying. Some may be "0.0", others can be texcoords or 1004 * colors. This needs either a pipeline replacement to make the vertex shader feed 1005 * proper varyings, or loop unrolling 1006 * 1007 * For now use the texcoords and hope for the best 1008 */ 1009 FIXME("Non-vertex shader varying input with indirect addressing\n"); 1010 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx); 1011 } 1012 else 1013 { 1014 /* D3D supports indirect addressing only with aL in loop registers. The loop instruction 1015 * pulls GL_NV_fragment_program2 in 1016 */ 1017 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx); 1018 } 1019 } 1020 else 1021 { 1022 if(reg->idx < MAX_REG_INPUT) 1023 { 1024 strcpy(register_name, ctx->ps_input[reg->idx]); 1025 } 1026 else 1027 { 1028 ERR("Pixel shader input register out of bounds: %u\n", reg->idx); 1029 sprintf(register_name, "out_of_bounds_%u", reg->idx); 1030 } 1031 } 1032 } 1033 } 1034 else 1035 { 1036 if (ctx->cur_vs_args->super.swizzle_map & (1 << reg->idx)) *is_color = TRUE; 1037 sprintf(register_name, "vertex.attrib[%u]", reg->idx); 1038 } 1039 break; 1040 1041 case WINED3DSPR_CONST: 1042 if (!pshader && reg->rel_addr) 1043 { 1044 BOOL aL = FALSE; 1045 char rel_reg[50]; 1046 UINT rel_offset = ((IWineD3DVertexShaderImpl *)This)->rel_offset; 1047 if(This->baseShader.reg_maps.shader_version.major < 2) { 1048 sprintf(rel_reg, "A0.x"); 1049 } else { 1050 shader_arb_get_src_param(ins, reg->rel_addr, 0, rel_reg); 1051 if(ctx->target_version == ARB) { 1052 if (!strcmp(rel_reg, "**aL_emul**")) 1053 { 1054 aL = TRUE; 1055 } else { 1056 shader_arb_request_a0(ins, rel_reg); 1057 sprintf(rel_reg, "A0.x"); 1058 } 1059 } 1060 } 1061 if(aL) 1062 sprintf(register_name, "C[%u]", ctx->aL + reg->idx); 1063 else if (reg->idx >= rel_offset) 1064 sprintf(register_name, "C[%s + %u]", rel_reg, reg->idx - rel_offset); 1065 else 1066 sprintf(register_name, "C[%s - %u]", rel_reg, rel_offset - reg->idx); 1067 } 1068 else 1069 { 1070 if (This->baseShader.reg_maps.usesrelconstF) 1071 sprintf(register_name, "C[%u]", reg->idx); 1072 else 1073 sprintf(register_name, "C%u", reg->idx); 1074 } 1075 break; 1076 1077 case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */ 1078 if (pshader) { 1079 if(This->baseShader.reg_maps.shader_version.major == 1 && 1080 This->baseShader.reg_maps.shader_version.minor <= 3) { 1081 /* In ps <= 1.3, Tx is a temporary register as destination to all instructions, 1082 * and as source to most instructions. For some instructions it is the texcoord 1083 * input. Those instructions know about the special use 1084 */ 1085 sprintf(register_name, "T%u", reg->idx); 1086 } else { 1087 /* in ps 1.4 and 2.x Tx is always a (read-only) varying */ 1088 sprintf(register_name, "fragment.texcoord[%u]", reg->idx); 1089 } 1090 } 1091 else 1092 { 1093 if(This->baseShader.reg_maps.shader_version.major == 1 || ctx->target_version >= NV2) 1094 { 1095 sprintf(register_name, "A%u", reg->idx); 1096 } 1097 else 1098 { 1099 sprintf(register_name, "A%u_SHADOW", reg->idx); 1100 } 1101 } 1102 break; 1103 1104 case WINED3DSPR_COLOROUT: 1105 if (ctx->cur_ps_args->super.srgb_correction && !reg->idx) 1106 { 1107 strcpy(register_name, "TMP_COLOR"); 1108 } 1109 else 1110 { 1111 if(ctx->cur_ps_args->super.srgb_correction) FIXME("sRGB correction on higher render targets\n"); 1112 if(This->baseShader.reg_maps.highest_render_target > 0) 1113 { 1114 sprintf(register_name, "result.color[%u]", reg->idx); 1115 } 1116 else 1117 { 1118 strcpy(register_name, "result.color"); 1119 } 1120 } 1121 break; 1122 1123 case WINED3DSPR_RASTOUT: 1124 if(reg->idx == 1) sprintf(register_name, "%s", ctx->fog_output); 1125 else sprintf(register_name, "%s", rastout_reg_names[reg->idx]); 1126 break; 1127 1128 case WINED3DSPR_DEPTHOUT: 1129 strcpy(register_name, "result.depth"); 1130 break; 1131 1132 case WINED3DSPR_ATTROUT: 1133 /* case WINED3DSPR_OUTPUT: */ 1134 if (pshader) sprintf(register_name, "oD[%u]", reg->idx); 1135 else strcpy(register_name, ctx->color_output[reg->idx]); 1136 break; 1137 1138 case WINED3DSPR_TEXCRDOUT: 1139 if (pshader) 1140 { 1141 sprintf(register_name, "oT[%u]", reg->idx); 1142 } 1143 else 1144 { 1145 if(This->baseShader.reg_maps.shader_version.major < 3) 1146 { 1147 strcpy(register_name, ctx->texcrd_output[reg->idx]); 1148 } 1149 else 1150 { 1151 strcpy(register_name, ctx->vs_output[reg->idx]); 1152 } 1153 } 1154 break; 1155 1156 case WINED3DSPR_LOOP: 1157 if(ctx->target_version >= NV2) 1158 { 1159 /* Pshader has an implicitly declared loop index counter A0.x that cannot be renamed */ 1160 if(pshader) sprintf(register_name, "A0.x"); 1161 else sprintf(register_name, "aL.y"); 1162 } 1163 else 1164 { 1165 /* Unfortunately this code cannot return the value of ctx->aL here. An immediate value 1166 * would be valid, but if aL is used for indexing(its only use), there's likely an offset, 1167 * thus the result would be something like C[15 + 30], which is not valid in the ARB program 1168 * grammar. So return a marker for the emulated aL and intercept it in constant and varying 1169 * indexing 1170 */ 1171 sprintf(register_name, "**aL_emul**"); 1172 } 1173 1174 break; 1175 1176 case WINED3DSPR_CONSTINT: 1177 sprintf(register_name, "I%u", reg->idx); 1178 break; 1179 1180 case WINED3DSPR_MISCTYPE: 1181 if (!reg->idx) 1182 { 1183 sprintf(register_name, "vpos"); 1184 } 1185 else if(reg->idx == 1) 1186 { 1187 sprintf(register_name, "fragment.facing.x"); 1188 } 1189 else 1190 { 1191 FIXME("Unknown MISCTYPE register index %u\n", reg->idx); 1192 } 1193 break; 1194 1195 default: 1196 FIXME("Unhandled register type %#x[%u]\n", reg->type, reg->idx); 1197 sprintf(register_name, "unrecognized_register[%u]", reg->idx); 1198 break; 1199 } 1200 } 1201 1202 static void shader_arb_get_dst_param(const struct wined3d_shader_instruction *ins, 1203 const struct wined3d_shader_dst_param *wined3d_dst, char *str) 1204 { 1205 char register_name[255]; 1206 char write_mask[6]; 1207 BOOL is_color; 1208 1209 shader_arb_get_register_name(ins, &wined3d_dst->reg, register_name, &is_color); 1210 strcpy(str, register_name); 1211 1212 shader_arb_get_write_mask(ins, wined3d_dst, write_mask); 1213 strcat(str, write_mask); 1214 } 1215 1216 static const char *shader_arb_get_fixup_swizzle(enum fixup_channel_source channel_source) 1217 { 1218 switch(channel_source) 1219 { 1220 case CHANNEL_SOURCE_ZERO: return "0"; 1221 case CHANNEL_SOURCE_ONE: return "1"; 1222 case CHANNEL_SOURCE_X: return "x"; 1223 case CHANNEL_SOURCE_Y: return "y"; 1224 case CHANNEL_SOURCE_Z: return "z"; 1225 case CHANNEL_SOURCE_W: return "w"; 1226 default: 1227 FIXME("Unhandled channel source %#x\n", channel_source); 1228 return "undefined"; 1229 } 1230 } 1231 1232 static void gen_color_correction(struct wined3d_shader_buffer *buffer, const char *reg, 1233 DWORD dst_mask, const char *one, const char *two, struct color_fixup_desc fixup) 1234 { 1235 DWORD mask; 1236 1237 if (is_complex_fixup(fixup)) 1238 { 1239 enum complex_fixup complex_fixup = get_complex_fixup(fixup); 1240 FIXME("Complex fixup (%#x) not supported\n", complex_fixup); 1241 return; 1242 } 1243 1244 mask = 0; 1245 if (fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0; 1246 if (fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1; 1247 if (fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2; 1248 if (fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3; 1249 mask &= dst_mask; 1250 1251 if (mask) 1252 { 1253 shader_addline(buffer, "SWZ %s, %s, %s, %s, %s, %s;\n", reg, reg, 1254 shader_arb_get_fixup_swizzle(fixup.x_source), shader_arb_get_fixup_swizzle(fixup.y_source), 1255 shader_arb_get_fixup_swizzle(fixup.z_source), shader_arb_get_fixup_swizzle(fixup.w_source)); 1256 } 1257 1258 mask = 0; 1259 if (fixup.x_sign_fixup) mask |= WINED3DSP_WRITEMASK_0; 1260 if (fixup.y_sign_fixup) mask |= WINED3DSP_WRITEMASK_1; 1261 if (fixup.z_sign_fixup) mask |= WINED3DSP_WRITEMASK_2; 1262 if (fixup.w_sign_fixup) mask |= WINED3DSP_WRITEMASK_3; 1263 mask &= dst_mask; 1264 1265 if (mask) 1266 { 1267 char reg_mask[6]; 1268 char *ptr = reg_mask; 1269 1270 if (mask != WINED3DSP_WRITEMASK_ALL) 1271 { 1272 *ptr++ = '.'; 1273 if (mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x'; 1274 if (mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y'; 1275 if (mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z'; 1276 if (mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w'; 1277 } 1278 *ptr = '\0'; 1279 1280 shader_addline(buffer, "MAD %s%s, %s, %s, -%s;\n", reg, reg_mask, reg, two, one); 1281 } 1282 } 1283 1284 static const char *shader_arb_get_modifier(const struct wined3d_shader_instruction *ins) 1285 { 1286 DWORD mod; 1287 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1288 if (!ins->dst_count) return ""; 1289 1290 mod = ins->dst[0].modifiers; 1291 1292 /* Silently ignore PARTIALPRECISION if its not supported */ 1293 if(priv->target_version == ARB) mod &= ~WINED3DSPDM_PARTIALPRECISION; 1294 1295 if(mod & WINED3DSPDM_MSAMPCENTROID) 1296 { 1297 FIXME("Unhandled modifier WINED3DSPDM_MSAMPCENTROID\n"); 1298 mod &= ~WINED3DSPDM_MSAMPCENTROID; 1299 } 1300 1301 switch(mod) 1302 { 1303 case WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION: 1304 return "H_SAT"; 1305 1306 case WINED3DSPDM_SATURATE: 1307 return "_SAT"; 1308 1309 case WINED3DSPDM_PARTIALPRECISION: 1310 return "H"; 1311 1312 case 0: 1313 return ""; 1314 1315 default: 1316 FIXME("Unknown modifiers 0x%08x\n", mod); 1317 return ""; 1318 } 1319 } 1320 1321 #define TEX_PROJ 0x1 1322 #define TEX_BIAS 0x2 1323 #define TEX_LOD 0x4 1324 #define TEX_DERIV 0x10 1325 1326 static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD sampler_idx, 1327 const char *dst_str, const char *coord_reg, WORD flags, const char *dsx, const char *dsy) 1328 { 1329 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1330 DWORD sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx]; 1331 IWineD3DBaseTextureImpl *texture; 1332 const char *tex_type; 1333 BOOL np2_fixup = FALSE; 1334 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader; 1335 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device; 1336 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1337 const char *mod; 1338 BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type); 1339 1340 /* D3D vertex shader sampler IDs are vertex samplers(0-3), not global d3d samplers */ 1341 if(!pshader) sampler_idx += MAX_FRAGMENT_SAMPLERS; 1342 1343 switch(sampler_type) { 1344 case WINED3DSTT_1D: 1345 tex_type = "1D"; 1346 break; 1347 1348 case WINED3DSTT_2D: 1349 texture = device->stateBlock->state.textures[sampler_idx]; 1350 if (texture && texture->baseTexture.target == GL_TEXTURE_RECTANGLE_ARB) 1351 { 1352 tex_type = "RECT"; 1353 } else { 1354 tex_type = "2D"; 1355 } 1356 if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) 1357 { 1358 if (priv->cur_np2fixup_info->super.active & (1 << sampler_idx)) 1359 { 1360 if (flags) FIXME("Only ordinary sampling from NP2 textures is supported.\n"); 1361 else np2_fixup = TRUE; 1362 } 1363 } 1364 break; 1365 1366 case WINED3DSTT_VOLUME: 1367 tex_type = "3D"; 1368 break; 1369 1370 case WINED3DSTT_CUBE: 1371 tex_type = "CUBE"; 1372 break; 1373 1374 default: 1375 ERR("Unexpected texture type %d\n", sampler_type); 1376 tex_type = ""; 1377 } 1378 1379 /* TEX, TXL, TXD and TXP do not support the "H" modifier, 1380 * so don't use shader_arb_get_modifier 1381 */ 1382 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) mod = "_SAT"; 1383 else mod = ""; 1384 1385 /* Fragment samplers always have indentity mapping */ 1386 if(sampler_idx >= MAX_FRAGMENT_SAMPLERS) 1387 { 1388 sampler_idx = priv->cur_vs_args->vertex.samplers[sampler_idx - MAX_FRAGMENT_SAMPLERS]; 1389 } 1390 1391 if (flags & TEX_DERIV) 1392 { 1393 if(flags & TEX_PROJ) FIXME("Projected texture sampling with custom derivatives\n"); 1394 if(flags & TEX_BIAS) FIXME("Biased texture sampling with custom derivatives\n"); 1395 shader_addline(buffer, "TXD%s %s, %s, %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, 1396 dsx, dsy,sampler_idx, tex_type); 1397 } 1398 else if(flags & TEX_LOD) 1399 { 1400 if(flags & TEX_PROJ) FIXME("Projected texture sampling with explicit lod\n"); 1401 if(flags & TEX_BIAS) FIXME("Biased texture sampling with explicit lod\n"); 1402 shader_addline(buffer, "TXL%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, 1403 sampler_idx, tex_type); 1404 } 1405 else if (flags & TEX_BIAS) 1406 { 1407 /* Shouldn't be possible, but let's check for it */ 1408 if(flags & TEX_PROJ) FIXME("Biased and Projected texture sampling\n"); 1409 /* TXB takes the 4th component of the source vector automatically, as d3d. Nothing more to do */ 1410 shader_addline(buffer, "TXB%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type); 1411 } 1412 else if (flags & TEX_PROJ) 1413 { 1414 shader_addline(buffer, "TXP%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type); 1415 } 1416 else 1417 { 1418 if (np2_fixup) 1419 { 1420 const unsigned char idx = priv->cur_np2fixup_info->super.idx[sampler_idx]; 1421 shader_addline(buffer, "MUL TA, np2fixup[%u].%s, %s;\n", idx >> 1, 1422 (idx % 2) ? "zwxy" : "xyzw", coord_reg); 1423 1424 shader_addline(buffer, "TEX%s %s, TA, texture[%u], %s;\n", mod, dst_str, sampler_idx, tex_type); 1425 } 1426 else 1427 shader_addline(buffer, "TEX%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type); 1428 } 1429 1430 if (pshader) 1431 { 1432 gen_color_correction(buffer, dst_str, ins->dst[0].write_mask, 1433 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_ONE), 1434 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_TWO), 1435 priv->cur_ps_args->super.color_fixup[sampler_idx]); 1436 } 1437 } 1438 1439 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins, 1440 const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr) 1441 { 1442 /* Generate a line that does the input modifier computation and return the input register to use */ 1443 BOOL is_color = FALSE; 1444 char regstr[256]; 1445 char swzstr[20]; 1446 int insert_line; 1447 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1448 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1449 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 1450 const char *two = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_TWO); 1451 1452 /* Assume a new line will be added */ 1453 insert_line = 1; 1454 1455 /* Get register name */ 1456 shader_arb_get_register_name(ins, &src->reg, regstr, &is_color); 1457 shader_arb_get_swizzle(src, is_color, swzstr); 1458 1459 switch (src->modifiers) 1460 { 1461 case WINED3DSPSM_NONE: 1462 sprintf(outregstr, "%s%s", regstr, swzstr); 1463 insert_line = 0; 1464 break; 1465 case WINED3DSPSM_NEG: 1466 sprintf(outregstr, "-%s%s", regstr, swzstr); 1467 insert_line = 0; 1468 break; 1469 case WINED3DSPSM_BIAS: 1470 shader_addline(buffer, "ADD T%c, %s, -coefdiv.x;\n", 'A' + tmpreg, regstr); 1471 break; 1472 case WINED3DSPSM_BIASNEG: 1473 shader_addline(buffer, "ADD T%c, -%s, coefdiv.x;\n", 'A' + tmpreg, regstr); 1474 break; 1475 case WINED3DSPSM_SIGN: 1476 shader_addline(buffer, "MAD T%c, %s, %s, -%s;\n", 'A' + tmpreg, regstr, two, one); 1477 break; 1478 case WINED3DSPSM_SIGNNEG: 1479 shader_addline(buffer, "MAD T%c, %s, %s, %s;\n", 'A' + tmpreg, regstr, two, one); 1480 break; 1481 case WINED3DSPSM_COMP: 1482 shader_addline(buffer, "SUB T%c, %s, %s;\n", 'A' + tmpreg, one, regstr); 1483 break; 1484 case WINED3DSPSM_X2: 1485 shader_addline(buffer, "ADD T%c, %s, %s;\n", 'A' + tmpreg, regstr, regstr); 1486 break; 1487 case WINED3DSPSM_X2NEG: 1488 shader_addline(buffer, "ADD T%c, -%s, -%s;\n", 'A' + tmpreg, regstr, regstr); 1489 break; 1490 case WINED3DSPSM_DZ: 1491 shader_addline(buffer, "RCP T%c, %s.z;\n", 'A' + tmpreg, regstr); 1492 shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg); 1493 break; 1494 case WINED3DSPSM_DW: 1495 shader_addline(buffer, "RCP T%c, %s.w;\n", 'A' + tmpreg, regstr); 1496 shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg); 1497 break; 1498 case WINED3DSPSM_ABS: 1499 if(ctx->target_version >= NV2) { 1500 sprintf(outregstr, "|%s%s|", regstr, swzstr); 1501 insert_line = 0; 1502 } else { 1503 shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr); 1504 } 1505 break; 1506 case WINED3DSPSM_ABSNEG: 1507 if(ctx->target_version >= NV2) { 1508 sprintf(outregstr, "-|%s%s|", regstr, swzstr); 1509 } else { 1510 shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr); 1511 sprintf(outregstr, "-T%c%s", 'A' + tmpreg, swzstr); 1512 } 1513 insert_line = 0; 1514 break; 1515 default: 1516 sprintf(outregstr, "%s%s", regstr, swzstr); 1517 insert_line = 0; 1518 } 1519 1520 /* Return modified or original register, with swizzle */ 1521 if (insert_line) 1522 sprintf(outregstr, "T%c%s", 'A' + tmpreg, swzstr); 1523 } 1524 1525 static void pshader_hw_bem(const struct wined3d_shader_instruction *ins) 1526 { 1527 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1528 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1529 char dst_name[50]; 1530 char src_name[2][50]; 1531 DWORD sampler_code = dst->reg.idx; 1532 1533 shader_arb_get_dst_param(ins, dst, dst_name); 1534 1535 /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed 1536 * 1537 * Keep in mind that src_name[1] can be "TB" and src_name[0] can be "TA" because modifiers like _x2 are valid 1538 * with bem. So delay loading the first parameter until after the perturbation calculation which needs two 1539 * temps is done. 1540 */ 1541 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1542 shader_addline(buffer, "SWZ TA, bumpenvmat%d, x, z, 0, 0;\n", sampler_code); 1543 shader_addline(buffer, "DP3 TC.r, TA, %s;\n", src_name[1]); 1544 shader_addline(buffer, "SWZ TA, bumpenvmat%d, y, w, 0, 0;\n", sampler_code); 1545 shader_addline(buffer, "DP3 TC.g, TA, %s;\n", src_name[1]); 1546 1547 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1548 shader_addline(buffer, "ADD %s, %s, TC;\n", dst_name, src_name[0]); 1549 } 1550 1551 static DWORD negate_modifiers(DWORD mod, char *extra_char) 1552 { 1553 *extra_char = ' '; 1554 switch(mod) 1555 { 1556 case WINED3DSPSM_NONE: return WINED3DSPSM_NEG; 1557 case WINED3DSPSM_NEG: return WINED3DSPSM_NONE; 1558 case WINED3DSPSM_BIAS: return WINED3DSPSM_BIASNEG; 1559 case WINED3DSPSM_BIASNEG: return WINED3DSPSM_BIAS; 1560 case WINED3DSPSM_SIGN: return WINED3DSPSM_SIGNNEG; 1561 case WINED3DSPSM_SIGNNEG: return WINED3DSPSM_SIGN; 1562 case WINED3DSPSM_COMP: *extra_char = '-'; return WINED3DSPSM_COMP; 1563 case WINED3DSPSM_X2: return WINED3DSPSM_X2NEG; 1564 case WINED3DSPSM_X2NEG: return WINED3DSPSM_X2; 1565 case WINED3DSPSM_DZ: *extra_char = '-'; return WINED3DSPSM_DZ; 1566 case WINED3DSPSM_DW: *extra_char = '-'; return WINED3DSPSM_DW; 1567 case WINED3DSPSM_ABS: return WINED3DSPSM_ABSNEG; 1568 case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS; 1569 } 1570 FIXME("Unknown modifier %u\n", mod); 1571 return mod; 1572 } 1573 1574 static void pshader_hw_cnd(const struct wined3d_shader_instruction *ins) 1575 { 1576 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1577 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1578 char dst_name[50]; 1579 char src_name[3][50]; 1580 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 1581 ins->ctx->reg_maps->shader_version.minor); 1582 BOOL is_color; 1583 1584 shader_arb_get_dst_param(ins, dst, dst_name); 1585 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1586 1587 /* The coissue flag changes the semantic of the cnd instruction in <= 1.3 shaders */ 1588 if (shader_version <= WINED3D_SHADER_VERSION(1, 3) && ins->coissue) 1589 { 1590 shader_addline(buffer, "MOV%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[1]); 1591 } else { 1592 struct wined3d_shader_src_param src0_copy = ins->src[0]; 1593 char extra_neg; 1594 1595 /* src0 may have a negate srcmod set, so we can't blindly add "-" to the name */ 1596 src0_copy.modifiers = negate_modifiers(src0_copy.modifiers, &extra_neg); 1597 1598 shader_arb_get_src_param(ins, &src0_copy, 0, src_name[0]); 1599 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1600 shader_addline(buffer, "ADD TA, %c%s, coefdiv.x;\n", extra_neg, src_name[0]); 1601 /* No modifiers supported on CMP */ 1602 shader_addline(buffer, "CMP %s, TA, %s, %s;\n", dst_name, src_name[1], src_name[2]); 1603 1604 /* _SAT on CMP doesn't make much sense, but it is not a pure NOP */ 1605 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) 1606 { 1607 shader_arb_get_register_name(ins, &dst->reg, src_name[0], &is_color); 1608 shader_addline(buffer, "MOV_SAT %s, %s;\n", dst_name, dst_name); 1609 } 1610 } 1611 } 1612 1613 static void pshader_hw_cmp(const struct wined3d_shader_instruction *ins) 1614 { 1615 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1616 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1617 char dst_name[50]; 1618 char src_name[3][50]; 1619 BOOL is_color; 1620 1621 shader_arb_get_dst_param(ins, dst, dst_name); 1622 1623 /* Generate input register names (with modifiers) */ 1624 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1625 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1626 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1627 1628 /* No modifiers are supported on CMP */ 1629 shader_addline(buffer, "CMP %s, %s, %s, %s;\n", dst_name, 1630 src_name[0], src_name[2], src_name[1]); 1631 1632 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) 1633 { 1634 shader_arb_get_register_name(ins, &dst->reg, src_name[0], &is_color); 1635 shader_addline(buffer, "MOV_SAT %s, %s;\n", dst_name, src_name[0]); 1636 } 1637 } 1638 1639 /** Process the WINED3DSIO_DP2ADD instruction in ARB. 1640 * dst = dot2(src0, src1) + src2 */ 1641 static void pshader_hw_dp2add(const struct wined3d_shader_instruction *ins) 1642 { 1643 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1644 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1645 char dst_name[50]; 1646 char src_name[3][50]; 1647 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1648 1649 shader_arb_get_dst_param(ins, dst, dst_name); 1650 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 1651 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 1652 1653 if(ctx->target_version >= NV3) 1654 { 1655 /* GL_NV_fragment_program2 has a 1:1 matching instruction */ 1656 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1657 shader_addline(buffer, "DP2A%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 1658 dst_name, src_name[0], src_name[1], src_name[2]); 1659 } 1660 else if(ctx->target_version >= NV2) 1661 { 1662 /* dst.x = src2.?, src0.x, src1.x + src0.y * src1.y 1663 * dst.y = src2.?, src0.x, src1.z + src0.y * src1.w 1664 * dst.z = src2.?, src0.x, src1.x + src0.y * src1.y 1665 * dst.z = src2.?, src0.x, src1.z + src0.y * src1.w 1666 * 1667 * Make sure that src1.zw = src1.xy, then we get a classic dp2add 1668 * 1669 * .xyxy and other swizzles that we could get with this are not valid in 1670 * plain ARBfp, but luckily the NV extension grammar lifts this limitation. 1671 */ 1672 struct wined3d_shader_src_param tmp_param = ins->src[1]; 1673 DWORD swizzle = tmp_param.swizzle & 0xf; /* Selects .xy */ 1674 tmp_param.swizzle = swizzle | (swizzle << 4); /* Creates .xyxy */ 1675 1676 shader_arb_get_src_param(ins, &tmp_param, 1, src_name[1]); 1677 1678 shader_addline(buffer, "X2D%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 1679 dst_name, src_name[2], src_name[0], src_name[1]); 1680 } 1681 else 1682 { 1683 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 1684 /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2] 1685 * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite 1686 */ 1687 shader_addline(buffer, "MOV TA, %s;\n", src_name[0]); 1688 shader_addline(buffer, "MOV TA.z, 0.0;\n"); 1689 shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]); 1690 shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]); 1691 } 1692 } 1693 1694 /* Map the opcode 1-to-1 to the GL code */ 1695 static void shader_hw_map2gl(const struct wined3d_shader_instruction *ins) 1696 { 1697 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1698 const char *instruction; 1699 char arguments[256], dst_str[50]; 1700 unsigned int i; 1701 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1702 1703 switch (ins->handler_idx) 1704 { 1705 case WINED3DSIH_ABS: instruction = "ABS"; break; 1706 case WINED3DSIH_ADD: instruction = "ADD"; break; 1707 case WINED3DSIH_CRS: instruction = "XPD"; break; 1708 case WINED3DSIH_DP3: instruction = "DP3"; break; 1709 case WINED3DSIH_DP4: instruction = "DP4"; break; 1710 case WINED3DSIH_DST: instruction = "DST"; break; 1711 case WINED3DSIH_FRC: instruction = "FRC"; break; 1712 case WINED3DSIH_LIT: instruction = "LIT"; break; 1713 case WINED3DSIH_LRP: instruction = "LRP"; break; 1714 case WINED3DSIH_MAD: instruction = "MAD"; break; 1715 case WINED3DSIH_MAX: instruction = "MAX"; break; 1716 case WINED3DSIH_MIN: instruction = "MIN"; break; 1717 case WINED3DSIH_MOV: instruction = "MOV"; break; 1718 case WINED3DSIH_MUL: instruction = "MUL"; break; 1719 case WINED3DSIH_SGE: instruction = "SGE"; break; 1720 case WINED3DSIH_SLT: instruction = "SLT"; break; 1721 case WINED3DSIH_SUB: instruction = "SUB"; break; 1722 case WINED3DSIH_MOVA:instruction = "ARR"; break; 1723 case WINED3DSIH_DSX: instruction = "DDX"; break; 1724 default: instruction = ""; 1725 FIXME("Unhandled opcode %#x\n", ins->handler_idx); 1726 break; 1727 } 1728 1729 /* Note that shader_arb_add_dst_param() adds spaces. */ 1730 arguments[0] = '\0'; 1731 shader_arb_get_dst_param(ins, dst, dst_str); 1732 for (i = 0; i < ins->src_count; ++i) 1733 { 1734 char operand[100]; 1735 strcat(arguments, ", "); 1736 shader_arb_get_src_param(ins, &ins->src[i], i, operand); 1737 strcat(arguments, operand); 1738 } 1739 shader_addline(buffer, "%s%s %s%s;\n", instruction, shader_arb_get_modifier(ins), dst_str, arguments); 1740 } 1741 1742 static void shader_hw_nop(const struct wined3d_shader_instruction *ins) 1743 { 1744 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1745 shader_addline(buffer, "NOP;\n"); 1746 } 1747 1748 static void shader_hw_mov(const struct wined3d_shader_instruction *ins) 1749 { 1750 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader; 1751 BOOL pshader = shader_is_pshader_version(shader->baseShader.reg_maps.shader_version.type); 1752 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 1753 const char *zero = arb_get_helper_value(shader->baseShader.reg_maps.shader_version.type, ARB_ZERO); 1754 const char *one = arb_get_helper_value(shader->baseShader.reg_maps.shader_version.type, ARB_ONE); 1755 const char *two = arb_get_helper_value(shader->baseShader.reg_maps.shader_version.type, ARB_TWO); 1756 1757 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1758 char src0_param[256]; 1759 1760 if(ins->handler_idx == WINED3DSIH_MOVA) { 1761 char write_mask[6]; 1762 const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET); 1763 1764 if(ctx->target_version >= NV2) { 1765 shader_hw_map2gl(ins); 1766 return; 1767 } 1768 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param); 1769 shader_arb_get_write_mask(ins, &ins->dst[0], write_mask); 1770 1771 /* This implements the mova formula used in GLSL. The first two instructions 1772 * prepare the sign() part. Note that it is fine to have my_sign(0.0) = 1.0 1773 * in this case: 1774 * mova A0.x, 0.0 1775 * 1776 * A0.x = arl(floor(abs(0.0) + 0.5) * 1.0) = floor(0.5) = 0.0 since arl does a floor 1777 * 1778 * The ARL is performed when A0 is used - the requested component is read from A0_SHADOW into 1779 * A0.x. We can use the overwritten component of A0_shadow as temporary storage for the sign. 1780 */ 1781 shader_addline(buffer, "SGE A0_SHADOW%s, %s, %s;\n", write_mask, src0_param, zero); 1782 shader_addline(buffer, "MAD A0_SHADOW%s, A0_SHADOW, %s, -%s;\n", write_mask, two, one); 1783 1784 shader_addline(buffer, "ABS TA%s, %s;\n", write_mask, src0_param); 1785 shader_addline(buffer, "ADD TA%s, TA, rel_addr_const.x;\n", write_mask); 1786 shader_addline(buffer, "FLR TA%s, TA;\n", write_mask); 1787 if (((IWineD3DVertexShaderImpl *)shader)->rel_offset) 1788 { 1789 shader_addline(buffer, "ADD TA%s, TA, %s;\n", write_mask, offset); 1790 } 1791 shader_addline(buffer, "MUL A0_SHADOW%s, TA, A0_SHADOW;\n", write_mask); 1792 1793 ((struct shader_arb_ctx_priv *)ins->ctx->backend_data)->addr_reg[0] = '\0'; 1794 } else if (ins->ctx->reg_maps->shader_version.major == 1 1795 && !shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type) 1796 && ins->dst[0].reg.type == WINED3DSPR_ADDR) 1797 { 1798 src0_param[0] = '\0'; 1799 if (((IWineD3DVertexShaderImpl *)shader)->rel_offset) 1800 { 1801 const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET); 1802 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param); 1803 shader_addline(buffer, "ADD TA.x, %s, %s;\n", src0_param, offset); 1804 shader_addline(buffer, "ARL A0.x, TA.x;\n"); 1805 } 1806 else 1807 { 1808 /* Apple's ARB_vertex_program implementation does not accept an ARL source argument 1809 * with more than one component. Thus replicate the first source argument over all 1810 * 4 components. For example, .xyzw -> .x (or better: .xxxx), .zwxy -> .z, etc) */ 1811 struct wined3d_shader_src_param tmp_src = ins->src[0]; 1812 tmp_src.swizzle = (tmp_src.swizzle & 0x3) * 0x55; 1813 shader_arb_get_src_param(ins, &tmp_src, 0, src0_param); 1814 shader_addline(buffer, "ARL A0.x, %s;\n", src0_param); 1815 } 1816 } 1817 else if (ins->dst[0].reg.type == WINED3DSPR_COLOROUT && !ins->dst[0].reg.idx && pshader) 1818 { 1819 IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) shader; 1820 if(ctx->cur_ps_args->super.srgb_correction && ps->color0_mov) 1821 { 1822 shader_addline(buffer, "#mov handled in srgb write code\n"); 1823 return; 1824 } 1825 shader_hw_map2gl(ins); 1826 } 1827 else 1828 { 1829 shader_hw_map2gl(ins); 1830 } 1831 } 1832 1833 static void pshader_hw_texkill(const struct wined3d_shader_instruction *ins) 1834 { 1835 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1836 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1837 char reg_dest[40]; 1838 1839 /* No swizzles are allowed in d3d's texkill. PS 1.x ignores the 4th component as documented, 1840 * but >= 2.0 honors it(undocumented, but tested by the d3d9 testsuit) 1841 */ 1842 shader_arb_get_dst_param(ins, dst, reg_dest); 1843 1844 if (ins->ctx->reg_maps->shader_version.major >= 2) 1845 { 1846 const char *kilsrc = "TA"; 1847 BOOL is_color; 1848 1849 shader_arb_get_register_name(ins, &dst->reg, reg_dest, &is_color); 1850 if(dst->write_mask == WINED3DSP_WRITEMASK_ALL) 1851 { 1852 kilsrc = reg_dest; 1853 } 1854 else 1855 { 1856 /* Sigh. KIL doesn't support swizzles/writemasks. KIL passes a writemask, but ".xy" for example 1857 * is not valid as a swizzle in ARB (needs ".xyyy"). Use SWZ to load the register properly, and set 1858 * masked out components to 0(won't kill) 1859 */ 1860 char x = '0', y = '0', z = '0', w = '0'; 1861 if(dst->write_mask & WINED3DSP_WRITEMASK_0) x = 'x'; 1862 if(dst->write_mask & WINED3DSP_WRITEMASK_1) y = 'y'; 1863 if(dst->write_mask & WINED3DSP_WRITEMASK_2) z = 'z'; 1864 if(dst->write_mask & WINED3DSP_WRITEMASK_3) w = 'w'; 1865 shader_addline(buffer, "SWZ TA, %s, %c, %c, %c, %c;\n", reg_dest, x, y, z, w); 1866 } 1867 shader_addline(buffer, "KIL %s;\n", kilsrc); 1868 } else { 1869 /* ARB fp doesn't like swizzles on the parameter of the KIL instruction. To mask the 4th component, 1870 * copy the register into our general purpose TMP variable, overwrite .w and pass TMP to KIL 1871 * 1872 * ps_1_3 shaders use the texcoord incarnation of the Tx register. ps_1_4 shaders can use the same, 1873 * or pass in any temporary register(in shader phase 2) 1874 */ 1875 if(ins->ctx->reg_maps->shader_version.minor <= 3) { 1876 sprintf(reg_dest, "fragment.texcoord[%u]", dst->reg.idx); 1877 } else { 1878 shader_arb_get_dst_param(ins, dst, reg_dest); 1879 } 1880 shader_addline(buffer, "SWZ TA, %s, x, y, z, 1;\n", reg_dest); 1881 shader_addline(buffer, "KIL TA;\n"); 1882 } 1883 } 1884 1885 static void pshader_hw_tex(const struct wined3d_shader_instruction *ins) 1886 { 1887 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1888 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1889 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 1890 ins->ctx->reg_maps->shader_version.minor); 1891 struct wined3d_shader_src_param src; 1892 1893 char reg_dest[40]; 1894 char reg_coord[40]; 1895 DWORD reg_sampler_code; 1896 WORD myflags = 0; 1897 1898 /* All versions have a destination register */ 1899 shader_arb_get_dst_param(ins, dst, reg_dest); 1900 1901 /* 1.0-1.4: Use destination register number as texture code. 1902 2.0+: Use provided sampler number as texure code. */ 1903 if (shader_version < WINED3D_SHADER_VERSION(2,0)) 1904 reg_sampler_code = dst->reg.idx; 1905 else 1906 reg_sampler_code = ins->src[1].reg.idx; 1907 1908 /* 1.0-1.3: Use the texcoord varying. 1909 1.4+: Use provided coordinate source register. */ 1910 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 1911 sprintf(reg_coord, "fragment.texcoord[%u]", reg_sampler_code); 1912 else { 1913 /* TEX is the only instruction that can handle DW and DZ natively */ 1914 src = ins->src[0]; 1915 if(src.modifiers == WINED3DSPSM_DW) src.modifiers = WINED3DSPSM_NONE; 1916 if(src.modifiers == WINED3DSPSM_DZ) src.modifiers = WINED3DSPSM_NONE; 1917 shader_arb_get_src_param(ins, &src, 0, reg_coord); 1918 } 1919 1920 /* projection flag: 1921 * 1.1, 1.2, 1.3: Use WINED3DTSS_TEXTURETRANSFORMFLAGS 1922 * 1.4: Use WINED3DSPSM_DZ or WINED3DSPSM_DW on src[0] 1923 * 2.0+: Use WINED3DSI_TEXLD_PROJECT on the opcode 1924 */ 1925 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 1926 { 1927 DWORD flags = 0; 1928 if (reg_sampler_code < MAX_TEXTURES) 1929 flags = priv->cur_ps_args->super.tex_transform >> reg_sampler_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT; 1930 if (flags & WINED3D_PSARGS_PROJECTED) 1931 myflags |= TEX_PROJ; 1932 } 1933 else if (shader_version < WINED3D_SHADER_VERSION(2,0)) 1934 { 1935 DWORD src_mod = ins->src[0].modifiers; 1936 if (src_mod == WINED3DSPSM_DZ) { 1937 /* TXP cannot handle DZ natively, so move the z coordinate to .w. reg_coord is a read-only 1938 * varying register, so we need a temp reg 1939 */ 1940 shader_addline(ins->ctx->buffer, "SWZ TA, %s, x, y, z, z;\n", reg_coord); 1941 strcpy(reg_coord, "TA"); 1942 myflags |= TEX_PROJ; 1943 } else if(src_mod == WINED3DSPSM_DW) { 1944 myflags |= TEX_PROJ; 1945 } 1946 } else { 1947 if (ins->flags & WINED3DSI_TEXLD_PROJECT) myflags |= TEX_PROJ; 1948 if (ins->flags & WINED3DSI_TEXLD_BIAS) myflags |= TEX_BIAS; 1949 } 1950 shader_hw_sample(ins, reg_sampler_code, reg_dest, reg_coord, myflags, NULL, NULL); 1951 } 1952 1953 static void pshader_hw_texcoord(const struct wined3d_shader_instruction *ins) 1954 { 1955 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 1956 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1957 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 1958 ins->ctx->reg_maps->shader_version.minor); 1959 char dst_str[50]; 1960 1961 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 1962 { 1963 DWORD reg = dst->reg.idx; 1964 1965 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 1966 shader_addline(buffer, "MOV_SAT %s, fragment.texcoord[%u];\n", dst_str, reg); 1967 } else { 1968 char reg_src[40]; 1969 1970 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src); 1971 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 1972 shader_addline(buffer, "MOV %s, %s;\n", dst_str, reg_src); 1973 } 1974 } 1975 1976 static void pshader_hw_texreg2ar(const struct wined3d_shader_instruction *ins) 1977 { 1978 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 1979 DWORD flags = 0; 1980 1981 DWORD reg1 = ins->dst[0].reg.idx; 1982 char dst_str[50]; 1983 char src_str[50]; 1984 1985 /* Note that texreg2ar treats Tx as a temporary register, not as a varying */ 1986 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 1987 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 1988 /* Move .x first in case src_str is "TA" */ 1989 shader_addline(buffer, "MOV TA.y, %s.x;\n", src_str); 1990 shader_addline(buffer, "MOV TA.x, %s.w;\n", src_str); 1991 if (reg1 < MAX_TEXTURES) 1992 { 1993 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 1994 flags = priv->cur_ps_args->super.tex_transform >> reg1 * WINED3D_PSARGS_TEXTRANSFORM_SHIFT; 1995 } 1996 shader_hw_sample(ins, reg1, dst_str, "TA", flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 1997 } 1998 1999 static void pshader_hw_texreg2gb(const struct wined3d_shader_instruction *ins) 2000 { 2001 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2002 2003 DWORD reg1 = ins->dst[0].reg.idx; 2004 char dst_str[50]; 2005 char src_str[50]; 2006 2007 /* Note that texreg2gb treats Tx as a temporary register, not as a varying */ 2008 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2009 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 2010 shader_addline(buffer, "MOV TA.x, %s.y;\n", src_str); 2011 shader_addline(buffer, "MOV TA.y, %s.z;\n", src_str); 2012 shader_hw_sample(ins, reg1, dst_str, "TA", 0, NULL, NULL); 2013 } 2014 2015 static void pshader_hw_texreg2rgb(const struct wined3d_shader_instruction *ins) 2016 { 2017 DWORD reg1 = ins->dst[0].reg.idx; 2018 char dst_str[50]; 2019 char src_str[50]; 2020 2021 /* Note that texreg2rg treats Tx as a temporary register, not as a varying */ 2022 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2023 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 2024 shader_hw_sample(ins, reg1, dst_str, src_str, 0, NULL, NULL); 2025 } 2026 2027 static void pshader_hw_texbem(const struct wined3d_shader_instruction *ins) 2028 { 2029 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2030 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2031 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2032 char reg_coord[40], dst_reg[50], src_reg[50]; 2033 DWORD reg_dest_code; 2034 2035 /* All versions have a destination register. The Tx where the texture coordinates come 2036 * from is the varying incarnation of the texture register 2037 */ 2038 reg_dest_code = dst->reg.idx; 2039 shader_arb_get_dst_param(ins, &ins->dst[0], dst_reg); 2040 shader_arb_get_src_param(ins, &ins->src[0], 0, src_reg); 2041 sprintf(reg_coord, "fragment.texcoord[%u]", reg_dest_code); 2042 2043 /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed 2044 * The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register 2045 * 2046 * GL_NV_fragment_program_option could handle this in one instruction via X2D: 2047 * X2D TA.xy, fragment.texcoord, T%u, bumpenvmat%u.xzyw 2048 * 2049 * However, the NV extensions are never enabled for <= 2.0 shaders because of the performance penalty that 2050 * comes with it, and texbem is an 1.x only instruction. No 1.x instruction forces us to enable the NV 2051 * extension. 2052 */ 2053 shader_addline(buffer, "SWZ TB, bumpenvmat%d, x, z, 0, 0;\n", reg_dest_code); 2054 shader_addline(buffer, "DP3 TA.x, TB, %s;\n", src_reg); 2055 shader_addline(buffer, "SWZ TB, bumpenvmat%d, y, w, 0, 0;\n", reg_dest_code); 2056 shader_addline(buffer, "DP3 TA.y, TB, %s;\n", src_reg); 2057 2058 /* with projective textures, texbem only divides the static texture coord, not the displacement, 2059 * so we can't let the GL handle this. 2060 */ 2061 if ((priv->cur_ps_args->super.tex_transform >> reg_dest_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT) 2062 & WINED3D_PSARGS_PROJECTED) 2063 { 2064 shader_addline(buffer, "RCP TB.w, %s.w;\n", reg_coord); 2065 shader_addline(buffer, "MUL TB.xy, %s, TB.w;\n", reg_coord); 2066 shader_addline(buffer, "ADD TA.xy, TA, TB;\n"); 2067 } else { 2068 shader_addline(buffer, "ADD TA.xy, TA, %s;\n", reg_coord); 2069 } 2070 2071 shader_hw_sample(ins, reg_dest_code, dst_reg, "TA", 0, NULL, NULL); 2072 2073 if (ins->handler_idx == WINED3DSIH_TEXBEML) 2074 { 2075 /* No src swizzles are allowed, so this is ok */ 2076 shader_addline(buffer, "MAD TA, %s.z, luminance%d.x, luminance%d.y;\n", 2077 src_reg, reg_dest_code, reg_dest_code); 2078 shader_addline(buffer, "MUL %s, %s, TA;\n", dst_reg, dst_reg); 2079 } 2080 } 2081 2082 static void pshader_hw_texm3x2pad(const struct wined3d_shader_instruction *ins) 2083 { 2084 DWORD reg = ins->dst[0].reg.idx; 2085 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2086 char src0_name[50], dst_name[50]; 2087 BOOL is_color; 2088 struct wined3d_shader_register tmp_reg = ins->dst[0].reg; 2089 2090 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2091 /* The next instruction will be a texm3x2tex or texm3x2depth that writes to the uninitialized 2092 * T<reg+1> register. Use this register to store the calculated vector 2093 */ 2094 tmp_reg.idx = reg + 1; 2095 shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color); 2096 shader_addline(buffer, "DP3 %s.x, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name); 2097 } 2098 2099 static void pshader_hw_texm3x2tex(const struct wined3d_shader_instruction *ins) 2100 { 2101 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader; 2102 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *)shader->baseShader.device; 2103 DWORD flags; 2104 DWORD reg = ins->dst[0].reg.idx; 2105 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2106 char dst_str[50]; 2107 char src0_name[50]; 2108 char dst_reg[50]; 2109 BOOL is_color; 2110 2111 /* We know that we're writing to the uninitialized T<reg> register, so use it for temporary storage */ 2112 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2113 2114 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2115 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2116 shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2117 flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->state.texture_states[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0; 2118 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2119 } 2120 2121 static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins) 2122 { 2123 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader; 2124 SHADER_PARSE_STATE *current_state = &shader->baseShader.parse_state; 2125 DWORD reg = ins->dst[0].reg.idx; 2126 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2127 char src0_name[50], dst_name[50]; 2128 struct wined3d_shader_register tmp_reg = ins->dst[0].reg; 2129 BOOL is_color; 2130 2131 /* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with 2132 * incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination 2133 * register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared') 2134 */ 2135 tmp_reg.idx = reg + 2 - current_state->current_row; 2136 shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color); 2137 2138 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2139 shader_addline(buffer, "DP3 %s.%c, fragment.texcoord[%u], %s;\n", 2140 dst_name, 'x' + current_state->current_row, reg, src0_name); 2141 current_state->texcoord_w[current_state->current_row++] = reg; 2142 } 2143 2144 static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins) 2145 { 2146 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader; 2147 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *)shader->baseShader.device; 2148 SHADER_PARSE_STATE *current_state = &shader->baseShader.parse_state; 2149 DWORD flags; 2150 DWORD reg = ins->dst[0].reg.idx; 2151 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2152 char dst_str[50]; 2153 char src0_name[50], dst_name[50]; 2154 BOOL is_color; 2155 2156 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2157 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2158 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name); 2159 2160 /* Sample the texture using the calculated coordinates */ 2161 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2162 flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->state.texture_states[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0; 2163 shader_hw_sample(ins, reg, dst_str, dst_name, flags & WINED3DTTFF_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2164 current_state->current_row = 0; 2165 } 2166 2167 static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins) 2168 { 2169 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader; 2170 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *)shader->baseShader.device; 2171 SHADER_PARSE_STATE *current_state = &shader->baseShader.parse_state; 2172 DWORD flags; 2173 DWORD reg = ins->dst[0].reg.idx; 2174 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2175 char dst_str[50]; 2176 char src0_name[50]; 2177 char dst_reg[50]; 2178 BOOL is_color; 2179 2180 /* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all 2181 * components for temporary data storage 2182 */ 2183 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2184 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2185 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2186 2187 /* Construct the eye-ray vector from w coordinates */ 2188 shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", current_state->texcoord_w[0]); 2189 shader_addline(buffer, "MOV TB.y, fragment.texcoord[%u].w;\n", current_state->texcoord_w[1]); 2190 shader_addline(buffer, "MOV TB.z, fragment.texcoord[%u].w;\n", reg); 2191 2192 /* Calculate reflection vector 2193 */ 2194 shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg); 2195 /* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */ 2196 shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg); 2197 shader_addline(buffer, "RCP TB.w, TB.w;\n"); 2198 shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg); 2199 shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg); 2200 shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg); 2201 2202 /* Sample the texture using the calculated coordinates */ 2203 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2204 flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->state.texture_states[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0; 2205 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2206 current_state->current_row = 0; 2207 } 2208 2209 static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins) 2210 { 2211 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader; 2212 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *)shader->baseShader.device; 2213 SHADER_PARSE_STATE *current_state = &shader->baseShader.parse_state; 2214 DWORD flags; 2215 DWORD reg = ins->dst[0].reg.idx; 2216 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2217 char dst_str[50]; 2218 char src0_name[50]; 2219 char src1_name[50]; 2220 char dst_reg[50]; 2221 BOOL is_color; 2222 2223 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 2224 shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name); 2225 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 2226 /* Note: dst_reg.xy is input here, generated by two texm3x3pad instructions */ 2227 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 2228 2229 /* Calculate reflection vector. 2230 * 2231 * dot(N, E) 2232 * dst_reg.xyz = 2 * --------- * N - E 2233 * dot(N, N) 2234 * 2235 * Which normalizes the normal vector 2236 */ 2237 shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name); 2238 shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg); 2239 shader_addline(buffer, "RCP TC.w, TC.w;\n"); 2240 shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg); 2241 shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg); 2242 shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name); 2243 2244 /* Sample the texture using the calculated coordinates */ 2245 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2246 flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->state.texture_states[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0; 2247 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 2248 current_state->current_row = 0; 2249 } 2250 2251 static void pshader_hw_texdepth(const struct wined3d_shader_instruction *ins) 2252 { 2253 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2254 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2255 char dst_name[50]; 2256 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2257 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2258 2259 /* texdepth has an implicit destination, the fragment depth value. It's only parameter, 2260 * which is essentially an input, is the destination register because it is the first 2261 * parameter. According to the msdn, this must be register r5, but let's keep it more flexible 2262 * here(writemasks/swizzles are not valid on texdepth) 2263 */ 2264 shader_arb_get_dst_param(ins, dst, dst_name); 2265 2266 /* According to the msdn, the source register(must be r5) is unusable after 2267 * the texdepth instruction, so we're free to modify it 2268 */ 2269 shader_addline(buffer, "MIN %s.y, %s.y, %s;\n", dst_name, dst_name, one); 2270 2271 /* How to deal with the special case dst_name.g == 0? if r != 0, then 2272 * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct 2273 * result. But if r = 0.0, then 0 * inf = 0, which is incorrect. 2274 */ 2275 shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name); 2276 shader_addline(buffer, "MUL TA.x, %s.x, %s.y;\n", dst_name, dst_name); 2277 shader_addline(buffer, "MIN TA.x, TA.x, %s;\n", one); 2278 shader_addline(buffer, "MAX result.depth, TA.x, %s;\n", zero); 2279 } 2280 2281 /** Process the WINED3DSIO_TEXDP3TEX instruction in ARB: 2282 * Take a 3-component dot product of the TexCoord[dstreg] and src, 2283 * then perform a 1D texture lookup from stage dstregnum, place into dst. */ 2284 static void pshader_hw_texdp3tex(const struct wined3d_shader_instruction *ins) 2285 { 2286 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2287 DWORD sampler_idx = ins->dst[0].reg.idx; 2288 char src0[50]; 2289 char dst_str[50]; 2290 2291 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2292 shader_addline(buffer, "MOV TB, 0.0;\n"); 2293 shader_addline(buffer, "DP3 TB.x, fragment.texcoord[%u], %s;\n", sampler_idx, src0); 2294 2295 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 2296 shader_hw_sample(ins, sampler_idx, dst_str, "TB", 0 /* Only one coord, can't be projected */, NULL, NULL); 2297 } 2298 2299 /** Process the WINED3DSIO_TEXDP3 instruction in ARB: 2300 * Take a 3-component dot product of the TexCoord[dstreg] and src. */ 2301 static void pshader_hw_texdp3(const struct wined3d_shader_instruction *ins) 2302 { 2303 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2304 char src0[50]; 2305 char dst_str[50]; 2306 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2307 2308 /* Handle output register */ 2309 shader_arb_get_dst_param(ins, dst, dst_str); 2310 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2311 shader_addline(buffer, "DP3 %s, fragment.texcoord[%u], %s;\n", dst_str, dst->reg.idx, src0); 2312 } 2313 2314 /** Process the WINED3DSIO_TEXM3X3 instruction in ARB 2315 * Perform the 3rd row of a 3x3 matrix multiply */ 2316 static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins) 2317 { 2318 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2319 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2320 char dst_str[50], dst_name[50]; 2321 char src0[50]; 2322 BOOL is_color; 2323 2324 shader_arb_get_dst_param(ins, dst, dst_str); 2325 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2326 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2327 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx, src0); 2328 shader_addline(buffer, "MOV %s, %s;\n", dst_str, dst_name); 2329 } 2330 2331 /** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB: 2332 * Last row of a 3x2 matrix multiply, use the result to calculate the depth: 2333 * Calculate tmp0.y = TexCoord[dstreg] . src.xyz; (tmp0.x has already been calculated) 2334 * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y 2335 */ 2336 static void pshader_hw_texm3x2depth(const struct wined3d_shader_instruction *ins) 2337 { 2338 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2339 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2340 char src0[50], dst_name[50]; 2341 BOOL is_color; 2342 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2343 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2344 2345 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 2346 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2347 shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx, src0); 2348 2349 /* How to deal with the special case dst_name.g == 0? if r != 0, then 2350 * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct 2351 * result. But if r = 0.0, then 0 * inf = 0, which is incorrect. 2352 */ 2353 shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name); 2354 shader_addline(buffer, "MUL %s.x, %s.x, %s.y;\n", dst_name, dst_name, dst_name); 2355 shader_addline(buffer, "MIN %s.x, %s.x, %s;\n", dst_name, dst_name, one); 2356 shader_addline(buffer, "MAX result.depth, %s.x, %s;\n", dst_name, zero); 2357 } 2358 2359 /** Handles transforming all WINED3DSIO_M?x? opcodes for 2360 Vertex/Pixel shaders to ARB_vertex_program codes */ 2361 static void shader_hw_mnxn(const struct wined3d_shader_instruction *ins) 2362 { 2363 int i; 2364 int nComponents = 0; 2365 struct wined3d_shader_dst_param tmp_dst = {{0}}; 2366 struct wined3d_shader_src_param tmp_src[2] = {{{0}}}; 2367 struct wined3d_shader_instruction tmp_ins; 2368 2369 memset(&tmp_ins, 0, sizeof(tmp_ins)); 2370 2371 /* Set constants for the temporary argument */ 2372 tmp_ins.ctx = ins->ctx; 2373 tmp_ins.dst_count = 1; 2374 tmp_ins.dst = &tmp_dst; 2375 tmp_ins.src_count = 2; 2376 tmp_ins.src = tmp_src; 2377 2378 switch(ins->handler_idx) 2379 { 2380 case WINED3DSIH_M4x4: 2381 nComponents = 4; 2382 tmp_ins.handler_idx = WINED3DSIH_DP4; 2383 break; 2384 case WINED3DSIH_M4x3: 2385 nComponents = 3; 2386 tmp_ins.handler_idx = WINED3DSIH_DP4; 2387 break; 2388 case WINED3DSIH_M3x4: 2389 nComponents = 4; 2390 tmp_ins.handler_idx = WINED3DSIH_DP3; 2391 break; 2392 case WINED3DSIH_M3x3: 2393 nComponents = 3; 2394 tmp_ins.handler_idx = WINED3DSIH_DP3; 2395 break; 2396 case WINED3DSIH_M3x2: 2397 nComponents = 2; 2398 tmp_ins.handler_idx = WINED3DSIH_DP3; 2399 break; 2400 default: 2401 FIXME("Unhandled opcode %#x\n", ins->handler_idx); 2402 break; 2403 } 2404 2405 tmp_dst = ins->dst[0]; 2406 tmp_src[0] = ins->src[0]; 2407 tmp_src[1] = ins->src[1]; 2408 for (i = 0; i < nComponents; i++) { 2409 tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i; 2410 shader_hw_map2gl(&tmp_ins); 2411 ++tmp_src[1].reg.idx; 2412 } 2413 } 2414 2415 static void shader_hw_rcp(const struct wined3d_shader_instruction *ins) 2416 { 2417 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2418 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2419 const char *flt_eps = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_EPS); 2420 2421 char dst[50]; 2422 char src[50]; 2423 2424 shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */ 2425 shader_arb_get_src_param(ins, &ins->src[0], 0, src); 2426 if (ins->src[0].swizzle == WINED3DSP_NOSWIZZLE) 2427 { 2428 /* Dx sdk says .x is used if no swizzle is given, but our test shows that 2429 * .w is used 2430 */ 2431 strcat(src, ".w"); 2432 } 2433 2434 /* TODO: If the destination is readable, and not the same as the source, the destination 2435 * can be used instead of TA 2436 */ 2437 if (priv->target_version >= NV2) 2438 { 2439 shader_addline(buffer, "MOVC TA.x, %s;\n", src); 2440 shader_addline(buffer, "MOV TA.x (EQ.x), %s;\n", flt_eps); 2441 shader_addline(buffer, "RCP%s %s, TA.x;\n", shader_arb_get_modifier(ins), dst); 2442 } 2443 else 2444 { 2445 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2446 shader_addline(buffer, "ABS TA.x, %s;\n", src); 2447 shader_addline(buffer, "SGE TA.y, -TA.x, %s;\n", zero); 2448 shader_addline(buffer, "MAD TA.x, TA.y, %s, %s;\n", flt_eps, src); 2449 shader_addline(buffer, "RCP%s %s, TA.x;\n", shader_arb_get_modifier(ins), dst); 2450 } 2451 } 2452 2453 static void shader_hw_scalar_op(const struct wined3d_shader_instruction *ins) 2454 { 2455 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2456 const char *instruction; 2457 2458 char dst[50]; 2459 char src[50]; 2460 2461 switch(ins->handler_idx) 2462 { 2463 case WINED3DSIH_RSQ: instruction = "RSQ"; break; 2464 case WINED3DSIH_RCP: instruction = "RCP"; break; 2465 case WINED3DSIH_EXP: instruction = "EX2"; break; 2466 case WINED3DSIH_EXPP: instruction = "EXP"; break; 2467 default: instruction = ""; 2468 FIXME("Unhandled opcode %#x\n", ins->handler_idx); 2469 break; 2470 } 2471 2472 shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */ 2473 shader_arb_get_src_param(ins, &ins->src[0], 0, src); 2474 if (ins->src[0].swizzle == WINED3DSP_NOSWIZZLE) 2475 { 2476 /* Dx sdk says .x is used if no swizzle is given, but our test shows that 2477 * .w is used 2478 */ 2479 strcat(src, ".w"); 2480 } 2481 2482 shader_addline(buffer, "%s%s %s, %s;\n", instruction, shader_arb_get_modifier(ins), dst, src); 2483 } 2484 2485 static void shader_hw_nrm(const struct wined3d_shader_instruction *ins) 2486 { 2487 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2488 char dst_name[50]; 2489 char src_name[50]; 2490 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2491 BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type); 2492 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 2493 2494 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2495 shader_arb_get_src_param(ins, &ins->src[0], 1 /* Use TB */, src_name); 2496 2497 /* In D3D, NRM of a vector with length zero returns zero. Catch this situation, as 2498 * otherwise NRM or RSQ would return NaN */ 2499 if(pshader && priv->target_version >= NV3) 2500 { 2501 /* GL_NV_fragment_program2's NRM needs protection against length zero vectors too 2502 * 2503 * TODO: Find out if DP3+NRM+MOV is really faster than DP3+RSQ+MUL 2504 */ 2505 shader_addline(buffer, "DP3C TA, %s, %s;\n", src_name, src_name); 2506 shader_addline(buffer, "NRM%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name); 2507 shader_addline(buffer, "MOV %s (EQ), %s;\n", dst_name, zero); 2508 } 2509 else if(priv->target_version >= NV2) 2510 { 2511 shader_addline(buffer, "DP3C TA.x, %s, %s;\n", src_name, src_name); 2512 shader_addline(buffer, "RSQ TA.x (NE), TA.x;\n"); 2513 shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name, 2514 src_name); 2515 } 2516 else 2517 { 2518 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 2519 2520 shader_addline(buffer, "DP3 TA.x, %s, %s;\n", src_name, src_name); 2521 /* Pass any non-zero value to RSQ if the input vector has a length of zero. The 2522 * RSQ result doesn't matter, as long as multiplying it by 0 returns 0. 2523 */ 2524 shader_addline(buffer, "SGE TA.y, -TA.x, %s;\n", zero); 2525 shader_addline(buffer, "MAD TA.x, %s, TA.y, TA.x;\n", one); 2526 2527 shader_addline(buffer, "RSQ TA.x, TA.x;\n"); 2528 /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/ 2529 shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name, 2530 src_name); 2531 } 2532 } 2533 2534 static void shader_hw_lrp(const struct wined3d_shader_instruction *ins) 2535 { 2536 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2537 char dst_name[50]; 2538 char src_name[3][50]; 2539 2540 /* ARB_fragment_program has a convenient LRP instruction */ 2541 if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) { 2542 shader_hw_map2gl(ins); 2543 return; 2544 } 2545 2546 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2547 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 2548 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 2549 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 2550 2551 shader_addline(buffer, "SUB TA, %s, %s;\n", src_name[1], src_name[2]); 2552 shader_addline(buffer, "MAD%s %s, %s, TA, %s;\n", shader_arb_get_modifier(ins), 2553 dst_name, src_name[0], src_name[2]); 2554 } 2555 2556 static void shader_hw_sincos(const struct wined3d_shader_instruction *ins) 2557 { 2558 /* This instruction exists in ARB, but the d3d instruction takes two extra parameters which 2559 * must contain fixed constants. So we need a separate function to filter those constants and 2560 * can't use map2gl 2561 */ 2562 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2563 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2564 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 2565 char dst_name[50]; 2566 char src_name0[50], src_name1[50], src_name2[50]; 2567 BOOL is_color; 2568 2569 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 2570 if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) { 2571 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2572 /* No modifiers are supported on SCS */ 2573 shader_addline(buffer, "SCS %s, %s;\n", dst_name, src_name0); 2574 2575 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) 2576 { 2577 shader_arb_get_register_name(ins, &dst->reg, src_name0, &is_color); 2578 shader_addline(buffer, "MOV_SAT %s, %s;\n", dst_name, src_name0); 2579 } 2580 } else if(priv->target_version >= NV2) { 2581 shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color); 2582 2583 /* Sincos writemask must be .x, .y or .xy */ 2584 if(dst->write_mask & WINED3DSP_WRITEMASK_0) 2585 shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0); 2586 if(dst->write_mask & WINED3DSP_WRITEMASK_1) 2587 shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0); 2588 } else { 2589 /* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8 2590 * helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2. 2591 * 2592 * sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ... 2593 * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ... 2594 * 2595 * The constants we get are: 2596 * 2597 * +1 +1, -1 -1 +1 +1 -1 -1 2598 * ---- , ---- , ---- , ----- , ----- , ----- , ------ 2599 * 1!*2 2!*4 3!*8 4!*16 5!*32 6!*64 7!*128 2600 * 2601 * If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2): 2602 * 2603 * (x/2)^2 = x^2 / 4 2604 * (x/2)^3 = x^3 / 8 2605 * (x/2)^4 = x^4 / 16 2606 * (x/2)^5 = x^5 / 32 2607 * etc 2608 * 2609 * To get the final result: 2610 * sin(x) = 2 * sin(x/2) * cos(x/2) 2611 * cos(x) = cos(x/2)^2 - sin(x/2)^2 2612 * (from sin(x+y) and cos(x+y) rules) 2613 * 2614 * As per MSDN, dst.z is undefined after the operation, and so is 2615 * dst.x and dst.y if they're masked out by the writemask. Ie 2616 * sincos dst.y, src1, c0, c1 2617 * returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler 2618 * vsa.exe also stops with an error if the dest register is the same register as the source 2619 * register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also 2620 * indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0). 2621 */ 2622 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 2623 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2); 2624 shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color); 2625 2626 shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0); /* x ^ 2 */ 2627 shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0); /* x ^ 3 */ 2628 shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0); /* x ^ 4 */ 2629 shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0); /* x ^ 5 */ 2630 shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0); /* x ^ 6 */ 2631 shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0); /* x ^ 7 */ 2632 2633 /* sin(x/2) 2634 * 2635 * Unfortunately we don't get the constants in a DP4-capable form. Is there a way to 2636 * properly merge that with MULs in the code above? 2637 * The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe 2638 * we can merge the sine and cosine MAD rows to calculate them together. 2639 */ 2640 shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */ 2641 shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */ 2642 shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */ 2643 shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */ 2644 2645 /* cos(x/2) */ 2646 shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */ 2647 shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */ 2648 shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */ 2649 2650 if(dst->write_mask & WINED3DSP_WRITEMASK_0) { 2651 /* cos x */ 2652 shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n"); 2653 shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name); 2654 } 2655 if(dst->write_mask & WINED3DSP_WRITEMASK_1) { 2656 /* sin x */ 2657 shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name); 2658 shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name); 2659 } 2660 } 2661 } 2662 2663 static void shader_hw_sgn(const struct wined3d_shader_instruction *ins) 2664 { 2665 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2666 char dst_name[50]; 2667 char src_name[50]; 2668 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 2669 2670 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 2671 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name); 2672 2673 /* SGN is only valid in vertex shaders */ 2674 if(ctx->target_version >= NV2) { 2675 shader_addline(buffer, "SSG%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name); 2676 return; 2677 } 2678 2679 /* If SRC > 0.0, -SRC < SRC = TRUE, otherwise false. 2680 * if SRC < 0.0, SRC < -SRC = TRUE. If neither is true, src = 0.0 2681 */ 2682 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) { 2683 shader_addline(buffer, "SLT %s, -%s, %s;\n", dst_name, src_name, src_name); 2684 } else { 2685 /* src contains TA? Write to the dest first. This won't overwrite our destination. 2686 * Then use TA, and calculate the final result 2687 * 2688 * Not reading from TA? Store the first result in TA to avoid overwriting the 2689 * destination if src reg = dst reg 2690 */ 2691 if(strstr(src_name, "TA")) 2692 { 2693 shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name); 2694 shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name); 2695 shader_addline(buffer, "ADD %s, %s, -TA;\n", dst_name, dst_name); 2696 } 2697 else 2698 { 2699 shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name); 2700 shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name); 2701 shader_addline(buffer, "ADD %s, TA, -%s;\n", dst_name, dst_name); 2702 } 2703 } 2704 } 2705 2706 static void shader_hw_dsy(const struct wined3d_shader_instruction *ins) 2707 { 2708 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2709 char src[50]; 2710 char dst[50]; 2711 char dst_name[50]; 2712 BOOL is_color; 2713 2714 shader_arb_get_dst_param(ins, &ins->dst[0], dst); 2715 shader_arb_get_src_param(ins, &ins->src[0], 0, src); 2716 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 2717 2718 shader_addline(buffer, "DDY %s, %s;\n", dst, src); 2719 shader_addline(buffer, "MUL%s %s, %s, ycorrection.y;\n", shader_arb_get_modifier(ins), dst, dst_name); 2720 } 2721 2722 static DWORD abs_modifier(DWORD mod, BOOL *need_abs) 2723 { 2724 *need_abs = FALSE; 2725 2726 switch(mod) 2727 { 2728 case WINED3DSPSM_NONE: return WINED3DSPSM_ABS; 2729 case WINED3DSPSM_NEG: return WINED3DSPSM_ABS; 2730 case WINED3DSPSM_BIAS: *need_abs = TRUE; return WINED3DSPSM_BIAS; 2731 case WINED3DSPSM_BIASNEG: *need_abs = TRUE; return WINED3DSPSM_BIASNEG; 2732 case WINED3DSPSM_SIGN: *need_abs = TRUE; return WINED3DSPSM_SIGN; 2733 case WINED3DSPSM_SIGNNEG: *need_abs = TRUE; return WINED3DSPSM_SIGNNEG; 2734 case WINED3DSPSM_COMP: *need_abs = TRUE; return WINED3DSPSM_COMP; 2735 case WINED3DSPSM_X2: *need_abs = TRUE; return WINED3DSPSM_X2; 2736 case WINED3DSPSM_X2NEG: *need_abs = TRUE; return WINED3DSPSM_X2NEG; 2737 case WINED3DSPSM_DZ: *need_abs = TRUE; return WINED3DSPSM_DZ; 2738 case WINED3DSPSM_DW: *need_abs = TRUE; return WINED3DSPSM_DW; 2739 case WINED3DSPSM_ABS: return WINED3DSPSM_ABS; 2740 case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS; 2741 } 2742 FIXME("Unknown modifier %u\n", mod); 2743 return mod; 2744 } 2745 2746 static void shader_hw_log_pow(const struct wined3d_shader_instruction *ins) 2747 { 2748 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2749 char src0[50], src1[50], dst[50]; 2750 struct wined3d_shader_src_param src0_copy = ins->src[0]; 2751 BOOL need_abs = FALSE; 2752 const char *instr; 2753 BOOL arg2 = FALSE; 2754 2755 switch(ins->handler_idx) 2756 { 2757 case WINED3DSIH_LOG: instr = "LG2"; break; 2758 case WINED3DSIH_LOGP: instr = "LOG"; break; 2759 case WINED3DSIH_POW: instr = "POW"; arg2 = TRUE; break; 2760 default: 2761 ERR("Unexpected instruction %d\n", ins->handler_idx); 2762 return; 2763 } 2764 2765 /* LOG, LOGP and POW operate on the absolute value of the input */ 2766 src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs); 2767 2768 shader_arb_get_dst_param(ins, &ins->dst[0], dst); 2769 shader_arb_get_src_param(ins, &src0_copy, 0, src0); 2770 if(arg2) shader_arb_get_src_param(ins, &ins->src[1], 1, src1); 2771 2772 if(need_abs) 2773 { 2774 shader_addline(buffer, "ABS TA, %s;\n", src0); 2775 if(arg2) 2776 { 2777 shader_addline(buffer, "%s%s %s, TA, %s;\n", instr, shader_arb_get_modifier(ins), dst, src1); 2778 } 2779 else 2780 { 2781 shader_addline(buffer, "%s%s %s, TA;\n", instr, shader_arb_get_modifier(ins), dst); 2782 } 2783 } 2784 else if(arg2) 2785 { 2786 shader_addline(buffer, "%s%s %s, %s, %s;\n", instr, shader_arb_get_modifier(ins), dst, src0, src1); 2787 } 2788 else 2789 { 2790 shader_addline(buffer, "%s%s %s, %s;\n", instr, shader_arb_get_modifier(ins), dst, src0); 2791 } 2792 } 2793 2794 static void shader_hw_loop(const struct wined3d_shader_instruction *ins) 2795 { 2796 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2797 char src_name[50]; 2798 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2799 2800 /* src0 is aL */ 2801 shader_arb_get_src_param(ins, &ins->src[1], 0, src_name); 2802 2803 if(vshader) 2804 { 2805 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2806 struct list *e = list_head(&priv->control_frames); 2807 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2808 2809 if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n"); 2810 /* The constant loader makes sure to load -1 into iX.w */ 2811 shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name); 2812 shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop); 2813 shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop); 2814 } 2815 else 2816 { 2817 shader_addline(buffer, "LOOP %s;\n", src_name); 2818 } 2819 } 2820 2821 static void shader_hw_rep(const struct wined3d_shader_instruction *ins) 2822 { 2823 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2824 char src_name[50]; 2825 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2826 2827 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name); 2828 2829 /* The constant loader makes sure to load -1 into iX.w */ 2830 if(vshader) 2831 { 2832 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2833 struct list *e = list_head(&priv->control_frames); 2834 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2835 2836 if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n"); 2837 2838 shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name); 2839 shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop); 2840 shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop); 2841 } 2842 else 2843 { 2844 shader_addline(buffer, "REP %s;\n", src_name); 2845 } 2846 } 2847 2848 static void shader_hw_endloop(const struct wined3d_shader_instruction *ins) 2849 { 2850 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2851 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2852 2853 if(vshader) 2854 { 2855 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2856 struct list *e = list_head(&priv->control_frames); 2857 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2858 2859 shader_addline(buffer, "ARAC aL.xy, aL;\n"); 2860 shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop); 2861 shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop); 2862 2863 if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n"); 2864 } 2865 else 2866 { 2867 shader_addline(buffer, "ENDLOOP;\n"); 2868 } 2869 } 2870 2871 static void shader_hw_endrep(const struct wined3d_shader_instruction *ins) 2872 { 2873 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2874 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2875 2876 if(vshader) 2877 { 2878 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2879 struct list *e = list_head(&priv->control_frames); 2880 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2881 2882 shader_addline(buffer, "ARAC aL.xy, aL;\n"); 2883 shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop); 2884 shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop); 2885 2886 if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n"); 2887 } 2888 else 2889 { 2890 shader_addline(buffer, "ENDREP;\n"); 2891 } 2892 } 2893 2894 static const struct control_frame *find_last_loop(const struct shader_arb_ctx_priv *priv) 2895 { 2896 struct control_frame *control_frame; 2897 2898 LIST_FOR_EACH_ENTRY(control_frame, &priv->control_frames, struct control_frame, entry) 2899 { 2900 if(control_frame->type == LOOP || control_frame->type == REP) return control_frame; 2901 } 2902 ERR("Could not find loop for break\n"); 2903 return NULL; 2904 } 2905 2906 static void shader_hw_break(const struct wined3d_shader_instruction *ins) 2907 { 2908 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2909 const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data); 2910 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2911 2912 if(vshader) 2913 { 2914 shader_addline(buffer, "BRA loop_%u_end;\n", control_frame->no.loop); 2915 } 2916 else 2917 { 2918 shader_addline(buffer, "BRK;\n"); 2919 } 2920 } 2921 2922 static const char *get_compare(COMPARISON_TYPE flags) 2923 { 2924 switch (flags) 2925 { 2926 case COMPARISON_GT: return "GT"; 2927 case COMPARISON_EQ: return "EQ"; 2928 case COMPARISON_GE: return "GE"; 2929 case COMPARISON_LT: return "LT"; 2930 case COMPARISON_NE: return "NE"; 2931 case COMPARISON_LE: return "LE"; 2932 default: 2933 FIXME("Unrecognized comparison value: %u\n", flags); 2934 return "(\?\?)"; 2935 } 2936 } 2937 2938 static COMPARISON_TYPE invert_compare(COMPARISON_TYPE flags) 2939 { 2940 switch (flags) 2941 { 2942 case COMPARISON_GT: return COMPARISON_LE; 2943 case COMPARISON_EQ: return COMPARISON_NE; 2944 case COMPARISON_GE: return COMPARISON_LT; 2945 case COMPARISON_LT: return COMPARISON_GE; 2946 case COMPARISON_NE: return COMPARISON_EQ; 2947 case COMPARISON_LE: return COMPARISON_GT; 2948 default: 2949 FIXME("Unrecognized comparison value: %u\n", flags); 2950 return -1; 2951 } 2952 } 2953 2954 static void shader_hw_breakc(const struct wined3d_shader_instruction *ins) 2955 { 2956 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2957 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2958 const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data); 2959 char src_name0[50]; 2960 char src_name1[50]; 2961 const char *comp = get_compare(ins->flags); 2962 2963 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 2964 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 2965 2966 if(vshader) 2967 { 2968 /* SUBC CC, src0, src1" works only in pixel shaders, so use TA to throw 2969 * away the subtraction result 2970 */ 2971 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 2972 shader_addline(buffer, "BRA loop_%u_end (%s.x);\n", control_frame->no.loop, comp); 2973 } 2974 else 2975 { 2976 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 2977 shader_addline(buffer, "BRK (%s.x);\n", comp); 2978 } 2979 } 2980 2981 static void shader_hw_ifc(const struct wined3d_shader_instruction *ins) 2982 { 2983 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 2984 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 2985 struct list *e = list_head(&priv->control_frames); 2986 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 2987 const char *comp; 2988 char src_name0[50]; 2989 char src_name1[50]; 2990 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 2991 2992 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 2993 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 2994 2995 if(vshader) 2996 { 2997 /* Invert the flag. We jump to the else label if the condition is NOT true */ 2998 comp = get_compare(invert_compare(ins->flags)); 2999 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3000 shader_addline(buffer, "BRA ifc_%u_else (%s.x);\n", control_frame->no.ifc, comp); 3001 } 3002 else 3003 { 3004 comp = get_compare(ins->flags); 3005 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 3006 shader_addline(buffer, "IF %s.x;\n", comp); 3007 } 3008 } 3009 3010 static void shader_hw_else(const struct wined3d_shader_instruction *ins) 3011 { 3012 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 3013 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3014 struct list *e = list_head(&priv->control_frames); 3015 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 3016 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3017 3018 if(vshader) 3019 { 3020 shader_addline(buffer, "BRA ifc_%u_endif;\n", control_frame->no.ifc); 3021 shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc); 3022 control_frame->had_else = TRUE; 3023 } 3024 else 3025 { 3026 shader_addline(buffer, "ELSE;\n"); 3027 } 3028 } 3029 3030 static void shader_hw_endif(const struct wined3d_shader_instruction *ins) 3031 { 3032 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 3033 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3034 struct list *e = list_head(&priv->control_frames); 3035 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 3036 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3037 3038 if(vshader) 3039 { 3040 if(control_frame->had_else) 3041 { 3042 shader_addline(buffer, "ifc_%u_endif:\n", control_frame->no.ifc); 3043 } 3044 else 3045 { 3046 shader_addline(buffer, "#No else branch. else is endif\n"); 3047 shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc); 3048 } 3049 } 3050 else 3051 { 3052 shader_addline(buffer, "ENDIF;\n"); 3053 } 3054 } 3055 3056 static void shader_hw_texldd(const struct wined3d_shader_instruction *ins) 3057 { 3058 DWORD sampler_idx = ins->src[1].reg.idx; 3059 char reg_dest[40]; 3060 char reg_src[3][40]; 3061 WORD flags = TEX_DERIV; 3062 3063 shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest); 3064 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src[0]); 3065 shader_arb_get_src_param(ins, &ins->src[2], 1, reg_src[1]); 3066 shader_arb_get_src_param(ins, &ins->src[3], 2, reg_src[2]); 3067 3068 if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ; 3069 if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS; 3070 3071 shader_hw_sample(ins, sampler_idx, reg_dest, reg_src[0], flags, reg_src[1], reg_src[2]); 3072 } 3073 3074 static void shader_hw_texldl(const struct wined3d_shader_instruction *ins) 3075 { 3076 DWORD sampler_idx = ins->src[1].reg.idx; 3077 char reg_dest[40]; 3078 char reg_coord[40]; 3079 WORD flags = TEX_LOD; 3080 3081 shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest); 3082 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_coord); 3083 3084 if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ; 3085 if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS; 3086 3087 shader_hw_sample(ins, sampler_idx, reg_dest, reg_coord, flags, NULL, NULL); 3088 } 3089 3090 static void shader_hw_label(const struct wined3d_shader_instruction *ins) 3091 { 3092 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 3093 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3094 3095 priv->in_main_func = FALSE; 3096 /* Call instructions activate the NV extensions, not labels and rets. If there is an uncalled 3097 * subroutine, don't generate a label that will make GL complain 3098 */ 3099 if(priv->target_version == ARB) return; 3100 3101 shader_addline(buffer, "l%u:\n", ins->src[0].reg.idx); 3102 } 3103 3104 static void vshader_add_footer(IWineD3DVertexShaderImpl *This, struct wined3d_shader_buffer *buffer, 3105 const struct arb_vs_compile_args *args, struct shader_arb_ctx_priv *priv_ctx) 3106 { 3107 const shader_reg_maps *reg_maps = &This->baseShader.reg_maps; 3108 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)This->baseShader.device; 3109 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 3110 unsigned int i; 3111 3112 /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used 3113 * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE), 3114 * the fog frag coord is thrown away. If the fog frag coord is used, but not written by 3115 * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0) 3116 */ 3117 if(args->super.fog_src == VS_FOG_Z) { 3118 shader_addline(buffer, "MOV result.fogcoord, TMP_OUT.z;\n"); 3119 } else if (!reg_maps->fog) { 3120 /* posFixup.x is always 1.0, so we can savely use it */ 3121 shader_addline(buffer, "ADD result.fogcoord, posFixup.x, -posFixup.x;\n"); 3122 } 3123 3124 /* Write the final position. 3125 * 3126 * OpenGL coordinates specify the center of the pixel while d3d coords specify 3127 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains 3128 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x 3129 * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that. 3130 */ 3131 shader_addline(buffer, "MUL TA, posFixup, TMP_OUT.w;\n"); 3132 shader_addline(buffer, "ADD TMP_OUT.x, TMP_OUT.x, TA.z;\n"); 3133 shader_addline(buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TA.w;\n"); 3134 3135 if(use_nv_clip(gl_info) && priv_ctx->target_version >= NV2) 3136 { 3137 if(args->super.clip_enabled) 3138 { 3139 for(i = 0; i < priv_ctx->vs_clipplanes; i++) 3140 { 3141 shader_addline(buffer, "DP4 result.clip[%u].x, TMP_OUT, state.clip[%u].plane;\n", i, i); 3142 } 3143 } 3144 } 3145 else if(args->clip.boolclip.clip_texcoord) 3146 { 3147 unsigned int cur_clip = 0; 3148 char component[4] = {'x', 'y', 'z', 'w'}; 3149 const char *zero = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ZERO); 3150 3151 for (i = 0; i < gl_info->limits.clipplanes; ++i) 3152 { 3153 if(args->clip.boolclip.clipplane_mask & (1 << i)) 3154 { 3155 shader_addline(buffer, "DP4 TA.%c, TMP_OUT, state.clip[%u].plane;\n", 3156 component[cur_clip++], i); 3157 } 3158 } 3159 switch(cur_clip) 3160 { 3161 case 0: 3162 shader_addline(buffer, "MOV TA, %s;\n", zero); 3163 break; 3164 case 1: 3165 shader_addline(buffer, "MOV TA.yzw, %s;\n", zero); 3166 break; 3167 case 2: 3168 shader_addline(buffer, "MOV TA.zw, %s;\n", zero); 3169 break; 3170 case 3: 3171 shader_addline(buffer, "MOV TA.w, %s;\n", zero); 3172 break; 3173 } 3174 shader_addline(buffer, "MOV result.texcoord[%u], TA;\n", 3175 args->clip.boolclip.clip_texcoord - 1); 3176 } 3177 3178 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c 3179 * and the glsl equivalent 3180 */ 3181 if(need_helper_const((IWineD3DBaseShaderImpl *) This, gl_info)) { 3182 const char *two = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_TWO); 3183 shader_addline(buffer, "MAD TMP_OUT.z, TMP_OUT.z, %s, -TMP_OUT.w;\n", two); 3184 } else { 3185 shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, TMP_OUT.z;\n"); 3186 shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, -TMP_OUT.w;\n"); 3187 } 3188 3189 shader_addline(buffer, "MOV result.position, TMP_OUT;\n"); 3190 3191 priv_ctx->footer_written = TRUE; 3192 } 3193 3194 static void shader_hw_ret(const struct wined3d_shader_instruction *ins) 3195 { 3196 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 3197 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 3198 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *) ins->ctx->shader; 3199 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 3200 3201 if(priv->target_version == ARB) return; 3202 3203 if(vshader) 3204 { 3205 if(priv->in_main_func) vshader_add_footer((IWineD3DVertexShaderImpl *) shader, buffer, priv->cur_vs_args, priv); 3206 } 3207 3208 shader_addline(buffer, "RET;\n"); 3209 } 3210 3211 static void shader_hw_call(const struct wined3d_shader_instruction *ins) 3212 { 3213 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 3214 shader_addline(buffer, "CAL l%u;\n", ins->src[0].reg.idx); 3215 } 3216 3217 /* GL locking is done by the caller */ 3218 static GLuint create_arb_blt_vertex_program(const struct wined3d_gl_info *gl_info) 3219 { 3220 GLuint program_id = 0; 3221 GLint pos; 3222 3223 const char *blt_vprogram = 3224 "!!ARBvp1.0\n" 3225 "PARAM c[1] = { { 1, 0.5 } };\n" 3226 "MOV result.position, vertex.position;\n" 3227 "MOV result.color, c[0].x;\n" 3228 "MOV result.texcoord[0], vertex.texcoord[0];\n" 3229 "END\n"; 3230 3231 GL_EXTCALL(glGenProgramsARB(1, &program_id)); 3232 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, program_id)); 3233 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 3234 strlen(blt_vprogram), blt_vprogram)); 3235 checkGLcall("glProgramStringARB()"); 3236 3237 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 3238 if (pos != -1) 3239 { 3240 FIXME("Vertex program error at position %d: %s\n\n", pos, 3241 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 3242 shader_arb_dump_program_source(blt_vprogram); 3243 } 3244 else 3245 { 3246 GLint native; 3247 3248 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 3249 checkGLcall("glGetProgramivARB()"); 3250 if (!native) WARN("Program exceeds native resource limits.\n"); 3251 } 3252 3253 return program_id; 3254 } 3255 3256 /* GL locking is done by the caller */ 3257 static GLuint create_arb_blt_fragment_program(const struct wined3d_gl_info *gl_info, 3258 enum tex_types tex_type, BOOL masked) 3259 { 3260 GLuint program_id = 0; 3261 const char *fprogram; 3262 GLint pos; 3263 3264 static const char * const blt_fprograms_full[tex_type_count] = 3265 { 3266 /* tex_1d */ 3267 NULL, 3268 /* tex_2d */ 3269 "!!ARBfp1.0\n" 3270 "TEMP R0;\n" 3271 "TEX R0.x, fragment.texcoord[0], texture[0], 2D;\n" 3272 "MOV result.depth.z, R0.x;\n" 3273 "END\n", 3274 /* tex_3d */ 3275 NULL, 3276 /* tex_cube */ 3277 "!!ARBfp1.0\n" 3278 "TEMP R0;\n" 3279 "TEX R0.x, fragment.texcoord[0], texture[0], CUBE;\n" 3280 "MOV result.depth.z, R0.x;\n" 3281 "END\n", 3282 /* tex_rect */ 3283 "!!ARBfp1.0\n" 3284 "TEMP R0;\n" 3285 "TEX R0.x, fragment.texcoord[0], texture[0], RECT;\n" 3286 "MOV result.depth.z, R0.x;\n" 3287 "END\n", 3288 }; 3289 3290 static const char * const blt_fprograms_masked[tex_type_count] = 3291 { 3292 /* tex_1d */ 3293 NULL, 3294 /* tex_2d */ 3295 "!!ARBfp1.0\n" 3296 "PARAM mask = program.local[0];\n" 3297 "TEMP R0;\n" 3298 "SLT R0.xy, fragment.position, mask.zwzw;\n" 3299 "MUL R0.x, R0.x, R0.y;\n" 3300 "KIL -R0.x;\n" 3301 "TEX R0.x, fragment.texcoord[0], texture[0], 2D;\n" 3302 "MOV result.depth.z, R0.x;\n" 3303 "END\n", 3304 /* tex_3d */ 3305 NULL, 3306 /* tex_cube */ 3307 "!!ARBfp1.0\n" 3308 "PARAM mask = program.local[0];\n" 3309 "TEMP R0;\n" 3310 "SLT R0.xy, fragment.position, mask.zwzw;\n" 3311 "MUL R0.x, R0.x, R0.y;\n" 3312 "KIL -R0.x;\n" 3313 "TEX R0.x, fragment.texcoord[0], texture[0], CUBE;\n" 3314 "MOV result.depth.z, R0.x;\n" 3315 "END\n", 3316 /* tex_rect */ 3317 "!!ARBfp1.0\n" 3318 "PARAM mask = program.local[0];\n" 3319 "TEMP R0;\n" 3320 "SLT R0.xy, fragment.position, mask.zwzw;\n" 3321 "MUL R0.x, R0.x, R0.y;\n" 3322 "KIL -R0.x;\n" 3323 "TEX R0.x, fragment.texcoord[0], texture[0], RECT;\n" 3324 "MOV result.depth.z, R0.x;\n" 3325 "END\n", 3326 }; 3327 3328 fprogram = masked ? blt_fprograms_masked[tex_type] : blt_fprograms_full[tex_type]; 3329 if (!fprogram) 3330 { 3331 FIXME("tex_type %#x not supported, falling back to tex_2d\n", tex_type); 3332 tex_type = tex_2d; 3333 fprogram = masked ? blt_fprograms_masked[tex_type] : blt_fprograms_full[tex_type]; 3334 } 3335 3336 GL_EXTCALL(glGenProgramsARB(1, &program_id)); 3337 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, program_id)); 3338 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(fprogram), fprogram)); 3339 checkGLcall("glProgramStringARB()"); 3340 3341 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 3342 if (pos != -1) 3343 { 3344 FIXME("Fragment program error at position %d: %s\n\n", pos, 3345 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 3346 shader_arb_dump_program_source(fprogram); 3347 } 3348 else 3349 { 3350 GLint native; 3351 3352 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 3353 checkGLcall("glGetProgramivARB()"); 3354 if (!native) WARN("Program exceeds native resource limits.\n"); 3355 } 3356 3357 return program_id; 3358 } 3359 3360 static void arbfp_add_sRGB_correction(struct wined3d_shader_buffer *buffer, const char *fragcolor, 3361 const char *tmp1, const char *tmp2, const char *tmp3, const char *tmp4, BOOL condcode) 3362 { 3363 /* Perform sRGB write correction. See GLX_EXT_framebuffer_sRGB */ 3364 3365 if(condcode) 3366 { 3367 /* Sigh. MOVC CC doesn't work, so use one of the temps as dummy dest */ 3368 shader_addline(buffer, "SUBC %s, %s.x, srgb_consts1.y;\n", tmp1, fragcolor); 3369 /* Calculate the > 0.0031308 case */ 3370 shader_addline(buffer, "POW %s.x (GE), %s.x, srgb_consts1.z;\n", fragcolor, fragcolor); 3371 shader_addline(buffer, "POW %s.y (GE), %s.y, srgb_consts1.z;\n", fragcolor, fragcolor); 3372 shader_addline(buffer, "POW %s.z (GE), %s.z, srgb_consts1.z;\n", fragcolor, fragcolor); 3373 shader_addline(buffer, "MUL %s.xyz (GE), %s, srgb_consts1.w;\n", fragcolor, fragcolor); 3374 shader_addline(buffer, "SUB %s.xyz (GE), %s, srgb_consts2.x;\n", fragcolor, fragcolor); 3375 /* Calculate the < case */ 3376 shader_addline(buffer, "MUL %s.xyz (LT), srgb_consts1.x, %s;\n", fragcolor, fragcolor); 3377 } 3378 else 3379 { 3380 /* Calculate the > 0.0031308 case */ 3381 shader_addline(buffer, "POW %s.x, %s.x, srgb_consts1.z;\n", tmp1, fragcolor); 3382 shader_addline(buffer, "POW %s.y, %s.y, srgb_consts1.z;\n", tmp1, fragcolor); 3383 shader_addline(buffer, "POW %s.z, %s.z, srgb_consts1.z;\n", tmp1, fragcolor); 3384 shader_addline(buffer, "MUL %s, %s, srgb_consts1.w;\n", tmp1, tmp1); 3385 shader_addline(buffer, "SUB %s, %s, srgb_consts2.x;\n", tmp1, tmp1); 3386 /* Calculate the < case */ 3387 shader_addline(buffer, "MUL %s, srgb_consts1.x, %s;\n", tmp2, fragcolor); 3388 /* Get 1.0 / 0.0 masks for > 0.0031308 and < 0.0031308 */ 3389 shader_addline(buffer, "SLT %s, srgb_consts1.y, %s;\n", tmp3, fragcolor); 3390 shader_addline(buffer, "SGE %s, srgb_consts1.y, %s;\n", tmp4, fragcolor); 3391 /* Store the components > 0.0031308 in the destination */ 3392 shader_addline(buffer, "MUL %s.xyz, %s, %s;\n", fragcolor, tmp1, tmp3); 3393 /* Add the components that are < 0.0031308 */ 3394 shader_addline(buffer, "MAD %s.xyz, %s, %s, %s;\n", fragcolor, tmp2, tmp4, fragcolor); 3395 /* Move everything into result.color at once. Nvidia hardware cannot handle partial 3396 * result.color writes(.rgb first, then .a), or handle overwriting already written 3397 * components. The assembler uses a temporary register in this case, which is usually 3398 * not allocated from one of our registers that were used earlier. 3399 */ 3400 } 3401 /* [0.0;1.0] clamping. Not needed, this is done implicitly */ 3402 } 3403 3404 static const DWORD *find_loop_control_values(IWineD3DBaseShaderImpl *This, DWORD idx) 3405 { 3406 const local_constant *constant; 3407 3408 LIST_FOR_EACH_ENTRY(constant, &This->baseShader.constantsI, local_constant, entry) 3409 { 3410 if (constant->idx == idx) 3411 { 3412 return constant->value; 3413 } 3414 } 3415 return NULL; 3416 } 3417 3418 static void init_ps_input(const IWineD3DPixelShaderImpl *This, const struct arb_ps_compile_args *args, 3419 struct shader_arb_ctx_priv *priv) 3420 { 3421 static const char * const texcoords[8] = 3422 { 3423 "fragment.texcoord[0]", "fragment.texcoord[1]", "fragment.texcoord[2]", "fragment.texcoord[3]", 3424 "fragment.texcoord[4]", "fragment.texcoord[5]", "fragment.texcoord[6]", "fragment.texcoord[7]" 3425 }; 3426 unsigned int i; 3427 const struct wined3d_shader_signature_element *sig = This->baseShader.input_signature; 3428 const char *semantic_name; 3429 DWORD semantic_idx; 3430 3431 switch(args->super.vp_mode) 3432 { 3433 case pretransformed: 3434 case fixedfunction: 3435 /* The pixelshader has to collect the varyings on its own. In any case properly load 3436 * color0 and color1. In the case of pretransformed vertices also load texcoords. Set 3437 * other attribs to 0.0. 3438 * 3439 * For fixedfunction this behavior is correct, according to the tests. For pretransformed 3440 * we'd either need a replacement shader that can load other attribs like BINORMAL, or 3441 * load the texcoord attrib pointers to match the pixel shader signature 3442 */ 3443 for(i = 0; i < MAX_REG_INPUT; i++) 3444 { 3445 semantic_name = sig[i].semantic_name; 3446 semantic_idx = sig[i].semantic_idx; 3447 if (!semantic_name) continue; 3448 3449 if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR)) 3450 { 3451 if (!semantic_idx) priv->ps_input[i] = "fragment.color.primary"; 3452 else if(semantic_idx == 1) priv->ps_input[i] = "fragment.color.secondary"; 3453 else priv->ps_input[i] = "0.0"; 3454 } 3455 else if(args->super.vp_mode == fixedfunction) 3456 { 3457 priv->ps_input[i] = "0.0"; 3458 } 3459 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD)) 3460 { 3461 if(semantic_idx < 8) priv->ps_input[i] = texcoords[semantic_idx]; 3462 else priv->ps_input[i] = "0.0"; 3463 } 3464 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG)) 3465 { 3466 if (!semantic_idx) priv->ps_input[i] = "fragment.fogcoord"; 3467 else priv->ps_input[i] = "0.0"; 3468 } 3469 else 3470 { 3471 priv->ps_input[i] = "0.0"; 3472 } 3473 3474 TRACE("v%u, semantic %s%u is %s\n", i, semantic_name, semantic_idx, priv->ps_input[i]); 3475 } 3476 break; 3477 3478 case vertexshader: 3479 /* That one is easy. The vertex shaders provide v0-v7 in fragment.texcoord and v8 and v9 in 3480 * fragment.color 3481 */ 3482 for(i = 0; i < 8; i++) 3483 { 3484 priv->ps_input[i] = texcoords[i]; 3485 } 3486 priv->ps_input[8] = "fragment.color.primary"; 3487 priv->ps_input[9] = "fragment.color.secondary"; 3488 break; 3489 } 3490 } 3491 3492 /* GL locking is done by the caller */ 3493 static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This, struct wined3d_shader_buffer *buffer, 3494 const struct arb_ps_compile_args *args, struct arb_ps_compiled_shader *compiled) 3495 { 3496 const shader_reg_maps* reg_maps = &This->baseShader.reg_maps; 3497 CONST DWORD *function = This->baseShader.function; 3498 const struct wined3d_gl_info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info; 3499 const local_constant *lconst; 3500 GLuint retval; 3501 char fragcolor[16]; 3502 DWORD *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This), next_local, cur; 3503 struct shader_arb_ctx_priv priv_ctx; 3504 BOOL dcl_td = FALSE; 3505 BOOL want_nv_prog = FALSE; 3506 struct arb_pshader_private *shader_priv = This->baseShader.backend_data; 3507 GLint errPos; 3508 DWORD map; 3509 3510 char srgbtmp[4][4]; 3511 unsigned int i, found = 0; 3512 3513 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) 3514 { 3515 if (!(map & 1) 3516 || (This->color0_mov && i == This->color0_reg) 3517 || (reg_maps->shader_version.major < 2 && !i)) 3518 continue; 3519 3520 sprintf(srgbtmp[found], "R%u", i); 3521 ++found; 3522 if (found == 4) break; 3523 } 3524 3525 switch(found) { 3526 case 0: 3527 sprintf(srgbtmp[0], "TA"); 3528 sprintf(srgbtmp[1], "TB"); 3529 sprintf(srgbtmp[2], "TC"); 3530 sprintf(srgbtmp[3], "TD"); 3531 dcl_td = TRUE; 3532 break; 3533 case 1: 3534 sprintf(srgbtmp[1], "TA"); 3535 sprintf(srgbtmp[2], "TB"); 3536 sprintf(srgbtmp[3], "TC"); 3537 break; 3538 case 2: 3539 sprintf(srgbtmp[2], "TA"); 3540 sprintf(srgbtmp[3], "TB"); 3541 break; 3542 case 3: 3543 sprintf(srgbtmp[3], "TA"); 3544 break; 3545 case 4: 3546 break; 3547 } 3548 3549 /* Create the hw ARB shader */ 3550 memset(&priv_ctx, 0, sizeof(priv_ctx)); 3551 priv_ctx.cur_ps_args = args; 3552 priv_ctx.compiled_fprog = compiled; 3553 priv_ctx.cur_np2fixup_info = &compiled->np2fixup_info; 3554 init_ps_input(This, args, &priv_ctx); 3555 list_init(&priv_ctx.control_frames); 3556 3557 /* Avoid enabling NV_fragment_program* if we do not need it. 3558 * 3559 * Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register, 3560 * and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation 3561 * is faster than what we gain from using higher native instructions. There are some things though 3562 * that cannot be emulated. In that case enable the extensions. 3563 * If the extension is enabled, instruction handlers that support both ways will use it. 3564 * 3565 * Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program. 3566 * So enable the best we can get. 3567 */ 3568 if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0 || reg_maps->usestexldd || 3569 reg_maps->usestexldl || reg_maps->usesfacing || reg_maps->usesifc || reg_maps->usescall) 3570 { 3571 want_nv_prog = TRUE; 3572 } 3573 3574 shader_addline(buffer, "!!ARBfp1.0\n"); 3575 if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM2]) 3576 { 3577 shader_addline(buffer, "OPTION NV_fragment_program2;\n"); 3578 priv_ctx.target_version = NV3; 3579 } 3580 else if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]) 3581 { 3582 shader_addline(buffer, "OPTION NV_fragment_program;\n"); 3583 priv_ctx.target_version = NV2; 3584 } else { 3585 if(want_nv_prog) 3586 { 3587 /* This is an error - either we're advertising the wrong shader version, or aren't enforcing some 3588 * limits properly 3589 */ 3590 ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n"); 3591 ERR("Try GLSL\n"); 3592 } 3593 priv_ctx.target_version = ARB; 3594 } 3595 3596 if(This->baseShader.reg_maps.highest_render_target > 0) 3597 { 3598 shader_addline(buffer, "OPTION ARB_draw_buffers;\n"); 3599 } 3600 3601 if (reg_maps->shader_version.major < 3) 3602 { 3603 switch(args->super.fog) { 3604 case FOG_OFF: 3605 break; 3606 case FOG_LINEAR: 3607 shader_addline(buffer, "OPTION ARB_fog_linear;\n"); 3608 break; 3609 case FOG_EXP: 3610 shader_addline(buffer, "OPTION ARB_fog_exp;\n"); 3611 break; 3612 case FOG_EXP2: 3613 shader_addline(buffer, "OPTION ARB_fog_exp2;\n"); 3614 break; 3615 } 3616 } 3617 3618 /* For now always declare the temps. At least the Nvidia assembler optimizes completely 3619 * unused temps away(but occupies them for the whole shader if they're used once). Always 3620 * declaring them avoids tricky bookkeeping work 3621 */ 3622 shader_addline(buffer, "TEMP TA;\n"); /* Used for modifiers */ 3623 shader_addline(buffer, "TEMP TB;\n"); /* Used for modifiers */ 3624 shader_addline(buffer, "TEMP TC;\n"); /* Used for modifiers */ 3625 if(dcl_td) shader_addline(buffer, "TEMP TD;\n"); /* Used for sRGB writing */ 3626 shader_addline(buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n"); 3627 shader_addline(buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n"); 3628 shader_addline(buffer, "PARAM ps_helper_const = { 0.0, 1.0, %1.10f, 0.0 };\n", eps); 3629 3630 if (reg_maps->shader_version.major < 2) 3631 { 3632 strcpy(fragcolor, "R0"); 3633 } else { 3634 if(args->super.srgb_correction) { 3635 if(This->color0_mov) { 3636 sprintf(fragcolor, "R%u", This->color0_reg); 3637 } else { 3638 shader_addline(buffer, "TEMP TMP_COLOR;\n"); 3639 strcpy(fragcolor, "TMP_COLOR"); 3640 } 3641 } else { 3642 strcpy(fragcolor, "result.color"); 3643 } 3644 } 3645 3646 if(args->super.srgb_correction) { 3647 shader_addline(buffer, "PARAM srgb_consts1 = {%f, %f, %f, %f};\n", 3648 srgb_mul_low, srgb_cmp, srgb_pow, srgb_mul_high); 3649 shader_addline(buffer, "PARAM srgb_consts2 = {%f, %f, %f, %f};\n", 3650 srgb_sub_high, 0.0, 0.0, 0.0); 3651 } 3652 3653 /* Base Declarations */ 3654 next_local = shader_generate_arb_declarations((IWineD3DBaseShader *)This, 3655 reg_maps, buffer, gl_info, lconst_map, NULL, &priv_ctx); 3656 3657 for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i) 3658 { 3659 if (!(map & 1)) continue; 3660 3661 cur = compiled->numbumpenvmatconsts; 3662 compiled->bumpenvmatconst[cur].const_num = WINED3D_CONST_NUM_UNUSED; 3663 compiled->bumpenvmatconst[cur].texunit = i; 3664 compiled->luminanceconst[cur].const_num = WINED3D_CONST_NUM_UNUSED; 3665 compiled->luminanceconst[cur].texunit = i; 3666 3667 /* We can fit the constants into the constant limit for sure because texbem, texbeml, bem and beml are only supported 3668 * in 1.x shaders, and GL_ARB_fragment_program has a constant limit of 24 constants. So in the worst case we're loading 3669 * 8 shader constants, 8 bump matrices and 8 luminance parameters and are perfectly fine. (No NP2 fixup on bumpmapped 3670 * textures due to conditional NP2 restrictions) 3671 * 3672 * Use local constants to load the bump env parameters, not program.env. This avoids collisions with d3d constants of 3673 * shaders in newer shader models. Since the bump env parameters have to share their space with NP2 fixup constants, 3674 * their location is shader dependent anyway and they cannot be loaded globally. 3675 */ 3676 compiled->bumpenvmatconst[cur].const_num = next_local++; 3677 shader_addline(buffer, "PARAM bumpenvmat%d = program.local[%d];\n", 3678 i, compiled->bumpenvmatconst[cur].const_num); 3679 compiled->numbumpenvmatconsts = cur + 1; 3680 3681 if (!(reg_maps->luminanceparams & (1 << i))) continue; 3682 3683 compiled->luminanceconst[cur].const_num = next_local++; 3684 shader_addline(buffer, "PARAM luminance%d = program.local[%d];\n", 3685 i, compiled->luminanceconst[cur].const_num); 3686 } 3687 3688 for(i = 0; i < MAX_CONST_I; i++) 3689 { 3690 compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED; 3691 if (reg_maps->integer_constants & (1 << i) && priv_ctx.target_version >= NV2) 3692 { 3693 const DWORD *control_values = find_loop_control_values((IWineD3DBaseShaderImpl *) This, i); 3694 3695 if(control_values) 3696 { 3697 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i, 3698 control_values[0], control_values[1], control_values[2]); 3699 } 3700 else 3701 { 3702 compiled->int_consts[i] = next_local; 3703 compiled->num_int_consts++; 3704 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++); 3705 } 3706 } 3707 } 3708 3709 if(reg_maps->vpos || reg_maps->usesdsy) 3710 { 3711 compiled->ycorrection = next_local; 3712 shader_addline(buffer, "PARAM ycorrection = program.local[%u];\n", next_local++); 3713 3714 if(reg_maps->vpos) 3715 { 3716 shader_addline(buffer, "TEMP vpos;\n"); 3717 /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen). 3718 * ycorrection.y: -1.0(onscreen), 1.0(offscreen) 3719 * ycorrection.z: 1.0 3720 * ycorrection.w: 0.0 3721 */ 3722 shader_addline(buffer, "MAD vpos, fragment.position, ycorrection.zyww, ycorrection.wxww;\n"); 3723 shader_addline(buffer, "FLR vpos.xy, vpos;\n"); 3724 } 3725 } 3726 else 3727 { 3728 compiled->ycorrection = WINED3D_CONST_NUM_UNUSED; 3729 } 3730 3731 /* Load constants to fixup NP2 texcoords if there are still free constants left: 3732 * Constants (texture dimensions) for the NP2 fixup are loaded as local program parameters. This will consume 3733 * at most 8 (MAX_FRAGMENT_SAMPLERS / 2) parameters, which is highly unlikely, since the application had to 3734 * use 16 NP2 textures at the same time. In case that we run out of constants the fixup is simply not 3735 * applied / activated. This will probably result in wrong rendering of the texture, but will save us from 3736 * shader compilation errors and the subsequent errors when drawing with this shader. */ 3737 if (priv_ctx.cur_ps_args->super.np2_fixup) { 3738 3739 struct arb_ps_np2fixup_info* const fixup = priv_ctx.cur_np2fixup_info; 3740 const WORD map = priv_ctx.cur_ps_args->super.np2_fixup; 3741 const UINT max_lconsts = gl_info->limits.arb_ps_local_constants; 3742 3743 fixup->offset = next_local; 3744 fixup->super.active = 0; 3745 3746 cur = 0; 3747 for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) { 3748 if (!(map & (1 << i))) continue; 3749 3750 if (fixup->offset + (cur >> 1) < max_lconsts) { 3751 fixup->super.active |= (1 << i); 3752 fixup->super.idx[i] = cur++; 3753 } else { 3754 FIXME("No free constant found to load NP2 fixup data into shader. " 3755 "Sampling from this texture will probably look wrong.\n"); 3756 break; 3757 } 3758 } 3759 3760 fixup->super.num_consts = (cur + 1) >> 1; 3761 if (fixup->super.num_consts) { 3762 shader_addline(buffer, "PARAM np2fixup[%u] = { program.env[%u..%u] };\n", 3763 fixup->super.num_consts, fixup->offset, fixup->super.num_consts + fixup->offset - 1); 3764 } 3765 3766 next_local += fixup->super.num_consts; 3767 } 3768 3769 if (shader_priv->clipplane_emulation != ~0U && args->clip) 3770 { 3771 shader_addline(buffer, "KIL fragment.texcoord[%u];\n", shader_priv->clipplane_emulation); 3772 } 3773 3774 /* Base Shader Body */ 3775 shader_generate_main((IWineD3DBaseShader *)This, buffer, reg_maps, function, &priv_ctx); 3776 3777 if(args->super.srgb_correction) { 3778 arbfp_add_sRGB_correction(buffer, fragcolor, srgbtmp[0], srgbtmp[1], srgbtmp[2], srgbtmp[3], 3779 priv_ctx.target_version >= NV2); 3780 } 3781 3782 if(strcmp(fragcolor, "result.color")) { 3783 shader_addline(buffer, "MOV result.color, %s;\n", fragcolor); 3784 } 3785 shader_addline(buffer, "END\n"); 3786 3787 /* TODO: change to resource.glObjectHandle or something like that */ 3788 GL_EXTCALL(glGenProgramsARB(1, &retval)); 3789 3790 TRACE("Creating a hw pixel shader, prg=%d\n", retval); 3791 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval)); 3792 3793 TRACE("Created hw pixel shader, prg=%d\n", retval); 3794 /* Create the program and check for errors */ 3795 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 3796 buffer->bsize, buffer->buffer)); 3797 checkGLcall("glProgramStringARB()"); 3798 3799 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos); 3800 if (errPos != -1) 3801 { 3802 FIXME("HW PixelShader Error at position %d: %s\n\n", 3803 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 3804 shader_arb_dump_program_source(buffer->buffer); 3805 retval = 0; 3806 } 3807 else 3808 { 3809 GLint native; 3810 3811 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 3812 checkGLcall("glGetProgramivARB()"); 3813 if (!native) WARN("Program exceeds native resource limits.\n"); 3814 } 3815 3816 /* Load immediate constants */ 3817 if(lconst_map) { 3818 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) { 3819 const float *value = (const float *)lconst->value; 3820 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, lconst_map[lconst->idx], value)); 3821 checkGLcall("glProgramLocalParameter4fvARB"); 3822 } 3823 HeapFree(GetProcessHeap(), 0, lconst_map); 3824 } 3825 3826 return retval; 3827 } 3828 3829 static int compare_sig(const struct wined3d_shader_signature_element *sig1, const struct wined3d_shader_signature_element *sig2) 3830 { 3831 unsigned int i; 3832 int ret; 3833 3834 for(i = 0; i < MAX_REG_INPUT; i++) 3835 { 3836 if (!sig1[i].semantic_name || !sig2[i].semantic_name) 3837 { 3838 /* Compare pointers, not contents. One string is NULL(element does not exist), the other one is not NULL */ 3839 if(sig1[i].semantic_name != sig2[i].semantic_name) return sig1[i].semantic_name < sig2[i].semantic_name ? -1 : 1; 3840 continue; 3841 } 3842 3843 if ((ret = strcmp(sig1[i].semantic_name, sig2[i].semantic_name))) return ret; 3844 if(sig1[i].semantic_idx != sig2[i].semantic_idx) return sig1[i].semantic_idx < sig2[i].semantic_idx ? -1 : 1; 3845 if(sig1[i].sysval_semantic != sig2[i].sysval_semantic) return sig1[i].sysval_semantic < sig2[i].sysval_semantic ? -1 : 1; 3846 if(sig1[i].component_type != sig2[i].component_type) return sig1[i].sysval_semantic < sig2[i].component_type ? -1 : 1; 3847 if(sig1[i].register_idx != sig2[i].register_idx) return sig1[i].register_idx < sig2[i].register_idx ? -1 : 1; 3848 if(sig1[i].mask != sig2->mask) return sig1[i].mask < sig2[i].mask ? -1 : 1; 3849 } 3850 return 0; 3851 } 3852 3853 static struct wined3d_shader_signature_element *clone_sig(const struct wined3d_shader_signature_element *sig) 3854 { 3855 struct wined3d_shader_signature_element *new; 3856 int i; 3857 char *name; 3858 3859 new = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*new) * MAX_REG_INPUT); 3860 for(i = 0; i < MAX_REG_INPUT; i++) 3861 { 3862 if (!sig[i].semantic_name) continue; 3863 3864 new[i] = sig[i]; 3865 /* Clone the semantic string */ 3866 name = HeapAlloc(GetProcessHeap(), 0, strlen(sig[i].semantic_name) + 1); 3867 strcpy(name, sig[i].semantic_name); 3868 new[i].semantic_name = name; 3869 } 3870 return new; 3871 } 3872 3873 static DWORD find_input_signature(struct shader_arb_priv *priv, const struct wined3d_shader_signature_element *sig) 3874 { 3875 struct wine_rb_entry *entry = wine_rb_get(&priv->signature_tree, sig); 3876 struct ps_signature *found_sig; 3877 3878 if (entry) 3879 { 3880 found_sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 3881 TRACE("Found existing signature %u\n", found_sig->idx); 3882 return found_sig->idx; 3883 } 3884 found_sig = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*sig)); 3885 found_sig->sig = clone_sig(sig); 3886 found_sig->idx = priv->ps_sig_number++; 3887 TRACE("New signature stored and assigned number %u\n", found_sig->idx); 3888 if(wine_rb_put(&priv->signature_tree, sig, &found_sig->entry) == -1) 3889 { 3890 ERR("Failed to insert program entry.\n"); 3891 } 3892 return found_sig->idx; 3893 } 3894 3895 static void init_output_registers(IWineD3DVertexShaderImpl *shader, DWORD sig_num, struct shader_arb_ctx_priv *priv_ctx, 3896 struct arb_vs_compiled_shader *compiled) 3897 { 3898 unsigned int i, j; 3899 static const char * const texcoords[8] = 3900 { 3901 "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]", 3902 "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]" 3903 }; 3904 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) shader->baseShader.device; 3905 IWineD3DBaseShaderClass *baseshader = &shader->baseShader; 3906 const struct wined3d_shader_signature_element *sig; 3907 const char *semantic_name; 3908 DWORD semantic_idx, reg_idx; 3909 3910 /* Write generic input varyings 0 to 7 to result.texcoord[], varying 8 to result.color.primary 3911 * and varying 9 to result.color.secondary 3912 */ 3913 static const char * const decl_idx_to_string[MAX_REG_INPUT] = 3914 { 3915 "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]", 3916 "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]", 3917 "result.color.primary", "result.color.secondary" 3918 }; 3919 3920 if(sig_num == ~0) 3921 { 3922 TRACE("Pixel shader uses builtin varyings\n"); 3923 /* Map builtins to builtins */ 3924 for(i = 0; i < 8; i++) 3925 { 3926 priv_ctx->texcrd_output[i] = texcoords[i]; 3927 } 3928 priv_ctx->color_output[0] = "result.color.primary"; 3929 priv_ctx->color_output[1] = "result.color.secondary"; 3930 priv_ctx->fog_output = "result.fogcoord"; 3931 3932 /* Map declared regs to builtins. Use "TA" to /dev/null unread output */ 3933 for (i = 0; i < (sizeof(baseshader->output_signature) / sizeof(*baseshader->output_signature)); ++i) 3934 { 3935 semantic_name = baseshader->output_signature[i].semantic_name; 3936 if (!semantic_name) continue; 3937 3938 if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION)) 3939 { 3940 TRACE("o%u is TMP_OUT\n", i); 3941 if (!baseshader->output_signature[i].semantic_idx) priv_ctx->vs_output[i] = "TMP_OUT"; 3942 else priv_ctx->vs_output[i] = "TA"; 3943 } 3944 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE)) 3945 { 3946 TRACE("o%u is result.pointsize\n", i); 3947 if (!baseshader->output_signature[i].semantic_idx) priv_ctx->vs_output[i] = "result.pointsize"; 3948 else priv_ctx->vs_output[i] = "TA"; 3949 } 3950 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR)) 3951 { 3952 TRACE("o%u is result.color.?, idx %u\n", i, baseshader->output_signature[i].semantic_idx); 3953 if (!baseshader->output_signature[i].semantic_idx) 3954 priv_ctx->vs_output[i] = "result.color.primary"; 3955 else if (baseshader->output_signature[i].semantic_idx == 1) 3956 priv_ctx->vs_output[i] = "result.color.secondary"; 3957 else priv_ctx->vs_output[i] = "TA"; 3958 } 3959 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD)) 3960 { 3961 TRACE("o%u is %s\n", i, texcoords[baseshader->output_signature[i].semantic_idx]); 3962 if (baseshader->output_signature[i].semantic_idx >= 8) priv_ctx->vs_output[i] = "TA"; 3963 else priv_ctx->vs_output[i] = texcoords[baseshader->output_signature[i].semantic_idx]; 3964 } 3965 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG)) 3966 { 3967 TRACE("o%u is result.fogcoord\n", i); 3968 if (baseshader->output_signature[i].semantic_idx > 0) priv_ctx->vs_output[i] = "TA"; 3969 else priv_ctx->vs_output[i] = "result.fogcoord"; 3970 } 3971 else 3972 { 3973 priv_ctx->vs_output[i] = "TA"; 3974 } 3975 } 3976 return; 3977 } 3978 3979 /* Instead of searching for the signature in the signature list, read the one from the current pixel shader. 3980 * Its maybe not the shader where the signature came from, but it is the same signature and faster to find 3981 */ 3982 sig = device->stateBlock->state.pixel_shader->baseShader.input_signature; 3983 TRACE("Pixel shader uses declared varyings\n"); 3984 3985 /* Map builtin to declared. /dev/null the results by default to the TA temp reg */ 3986 for(i = 0; i < 8; i++) 3987 { 3988 priv_ctx->texcrd_output[i] = "TA"; 3989 } 3990 priv_ctx->color_output[0] = "TA"; 3991 priv_ctx->color_output[1] = "TA"; 3992 priv_ctx->fog_output = "TA"; 3993 3994 for(i = 0; i < MAX_REG_INPUT; i++) 3995 { 3996 semantic_name = sig[i].semantic_name; 3997 semantic_idx = sig[i].semantic_idx; 3998 reg_idx = sig[i].register_idx; 3999 if (!semantic_name) continue; 4000 4001 /* If a declared input register is not written by builtin arguments, don't write to it. 4002 * GL_NV_vertex_program makes sure the input defaults to 0.0, which is correct with D3D 4003 * 4004 * Don't care about POSITION and PSIZE here - this is a builtin vertex shader, position goes 4005 * to TMP_OUT in any case 4006 */ 4007 if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD)) 4008 { 4009 if(semantic_idx < 8) priv_ctx->texcrd_output[semantic_idx] = decl_idx_to_string[reg_idx]; 4010 } 4011 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR)) 4012 { 4013 if(semantic_idx < 2) priv_ctx->color_output[semantic_idx] = decl_idx_to_string[reg_idx]; 4014 } 4015 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG)) 4016 { 4017 if (!semantic_idx) priv_ctx->fog_output = decl_idx_to_string[reg_idx]; 4018 } 4019 else 4020 { 4021 continue; 4022 } 4023 4024 if (!strcmp(decl_idx_to_string[reg_idx], "result.color.primary") 4025 || !strcmp(decl_idx_to_string[reg_idx], "result.color.secondary")) 4026 { 4027 compiled->need_color_unclamp = TRUE; 4028 } 4029 } 4030 4031 /* Map declared to declared */ 4032 for (i = 0; i < (sizeof(baseshader->output_signature) / sizeof(*baseshader->output_signature)); ++i) 4033 { 4034 /* Write unread output to TA to throw them away */ 4035 priv_ctx->vs_output[i] = "TA"; 4036 semantic_name = baseshader->output_signature[i].semantic_name; 4037 if (!semantic_name) continue; 4038 4039 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION) 4040 && !baseshader->output_signature[i].semantic_idx) 4041 { 4042 priv_ctx->vs_output[i] = "TMP_OUT"; 4043 continue; 4044 } 4045 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE) 4046 && !baseshader->output_signature[i].semantic_idx) 4047 { 4048 priv_ctx->vs_output[i] = "result.pointsize"; 4049 continue; 4050 } 4051 4052 for(j = 0; j < MAX_REG_INPUT; j++) 4053 { 4054 if (!sig[j].semantic_name) continue; 4055 4056 if (!strcmp(sig[j].semantic_name, semantic_name) 4057 && sig[j].semantic_idx == baseshader->output_signature[i].semantic_idx) 4058 { 4059 priv_ctx->vs_output[i] = decl_idx_to_string[sig[j].register_idx]; 4060 4061 if (!strcmp(priv_ctx->vs_output[i], "result.color.primary") 4062 || !strcmp(priv_ctx->vs_output[i], "result.color.secondary")) 4063 { 4064 compiled->need_color_unclamp = TRUE; 4065 } 4066 } 4067 } 4068 } 4069 } 4070 4071 /* GL locking is done by the caller */ 4072 static GLuint shader_arb_generate_vshader(IWineD3DVertexShaderImpl *This, struct wined3d_shader_buffer *buffer, 4073 const struct arb_vs_compile_args *args, struct arb_vs_compiled_shader *compiled) 4074 { 4075 const shader_reg_maps *reg_maps = &This->baseShader.reg_maps; 4076 CONST DWORD *function = This->baseShader.function; 4077 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)This->baseShader.device; 4078 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 4079 const local_constant *lconst; 4080 GLuint ret; 4081 DWORD next_local, *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This); 4082 struct shader_arb_ctx_priv priv_ctx; 4083 unsigned int i; 4084 GLint errPos; 4085 4086 memset(&priv_ctx, 0, sizeof(priv_ctx)); 4087 priv_ctx.cur_vs_args = args; 4088 list_init(&priv_ctx.control_frames); 4089 init_output_registers(This, args->ps_signature, &priv_ctx, compiled); 4090 4091 /* Create the hw ARB shader */ 4092 shader_addline(buffer, "!!ARBvp1.0\n"); 4093 4094 /* Always enable the NV extension if available. Unlike fragment shaders, there is no 4095 * mesurable performance penalty, and we can always make use of it for clipplanes. 4096 */ 4097 if (gl_info->supported[NV_VERTEX_PROGRAM3]) 4098 { 4099 shader_addline(buffer, "OPTION NV_vertex_program3;\n"); 4100 priv_ctx.target_version = NV3; 4101 shader_addline(buffer, "ADDRESS aL;\n"); 4102 } 4103 else if (gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]) 4104 { 4105 shader_addline(buffer, "OPTION NV_vertex_program2;\n"); 4106 priv_ctx.target_version = NV2; 4107 shader_addline(buffer, "ADDRESS aL;\n"); 4108 } else { 4109 priv_ctx.target_version = ARB; 4110 } 4111 4112 shader_addline(buffer, "TEMP TMP_OUT;\n"); 4113 if(need_helper_const((IWineD3DBaseShaderImpl *) This, gl_info)) { 4114 shader_addline(buffer, "PARAM helper_const = { 0.0, 1.0, 2.0, %1.10f};\n", eps); 4115 } 4116 if(need_rel_addr_const((IWineD3DBaseShaderImpl *) This, gl_info)) { 4117 shader_addline(buffer, "PARAM rel_addr_const = { 0.5, %d.0, 0.0, 0.0 };\n", This->rel_offset); 4118 shader_addline(buffer, "TEMP A0_SHADOW;\n"); 4119 } 4120 4121 shader_addline(buffer, "TEMP TA;\n"); 4122 4123 /* Base Declarations */ 4124 next_local = shader_generate_arb_declarations((IWineD3DBaseShader *)This, 4125 reg_maps, buffer, gl_info, lconst_map, &priv_ctx.vs_clipplanes, &priv_ctx); 4126 4127 for(i = 0; i < MAX_CONST_I; i++) 4128 { 4129 compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED; 4130 if(reg_maps->integer_constants & (1 << i) && priv_ctx.target_version >= NV2) 4131 { 4132 const DWORD *control_values = find_loop_control_values((IWineD3DBaseShaderImpl *) This, i); 4133 4134 if(control_values) 4135 { 4136 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i, 4137 control_values[0], control_values[1], control_values[2]); 4138 } 4139 else 4140 { 4141 compiled->int_consts[i] = next_local; 4142 compiled->num_int_consts++; 4143 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++); 4144 } 4145 } 4146 } 4147 4148 /* We need a constant to fixup the final position */ 4149 shader_addline(buffer, "PARAM posFixup = program.local[%u];\n", next_local); 4150 compiled->pos_fixup = next_local++; 4151 4152 /* Initialize output parameters. GL_ARB_vertex_program does not require special initialization values 4153 * for output parameters. D3D in theory does not do that either, but some applications depend on a 4154 * proper initialization of the secondary color, and programs using the fixed function pipeline without 4155 * a replacement shader depend on the texcoord.w being set properly. 4156 * 4157 * GL_NV_vertex_program defines that all output values are initialized to {0.0, 0.0, 0.0, 1.0}. This 4158 * assertion is in effect even when using GL_ARB_vertex_program without any NV specific additions. So 4159 * skip this if NV_vertex_program is supported. Otherwise, initialize the secondary color. For the tex- 4160 * coords, we have a flag in the opengl caps. Many cards do not require the texcoord being set, and 4161 * this can eat a number of instructions, so skip it unless this cap is set as well 4162 */ 4163 if (!gl_info->supported[NV_VERTEX_PROGRAM]) 4164 { 4165 const char *color_init = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_0001); 4166 shader_addline(buffer, "MOV result.color.secondary, %s;\n", color_init); 4167 4168 if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W && !device->frag_pipe->ffp_proj_control) 4169 { 4170 int i; 4171 const char *one = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ONE); 4172 for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) 4173 { 4174 if (This->baseShader.reg_maps.texcoord_mask[i] 4175 && This->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) 4176 shader_addline(buffer, "MOV result.texcoord[%u].w, %s\n", i, one); 4177 } 4178 } 4179 } 4180 4181 /* The shader starts with the main function */ 4182 priv_ctx.in_main_func = TRUE; 4183 /* Base Shader Body */ 4184 shader_generate_main((IWineD3DBaseShader *)This, buffer, reg_maps, function, &priv_ctx); 4185 4186 if(!priv_ctx.footer_written) vshader_add_footer(This, buffer, args, &priv_ctx); 4187 4188 shader_addline(buffer, "END\n"); 4189 4190 /* TODO: change to resource.glObjectHandle or something like that */ 4191 GL_EXTCALL(glGenProgramsARB(1, &ret)); 4192 4193 TRACE("Creating a hw vertex shader, prg=%d\n", ret); 4194 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, ret)); 4195 4196 TRACE("Created hw vertex shader, prg=%d\n", ret); 4197 /* Create the program and check for errors */ 4198 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 4199 buffer->bsize, buffer->buffer)); 4200 checkGLcall("glProgramStringARB()"); 4201 4202 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos); 4203 if (errPos != -1) 4204 { 4205 FIXME("HW VertexShader Error at position %d: %s\n\n", 4206 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 4207 shader_arb_dump_program_source(buffer->buffer); 4208 ret = -1; 4209 } 4210 else 4211 { 4212 GLint native; 4213 4214 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 4215 checkGLcall("glGetProgramivARB()"); 4216 if (!native) WARN("Program exceeds native resource limits.\n"); 4217 4218 /* Load immediate constants */ 4219 if(lconst_map) { 4220 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) { 4221 const float *value = (const float *)lconst->value; 4222 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, lconst_map[lconst->idx], value)); 4223 } 4224 } 4225 } 4226 HeapFree(GetProcessHeap(), 0, lconst_map); 4227 4228 return ret; 4229 } 4230 4231 /* GL locking is done by the caller */ 4232 static struct arb_ps_compiled_shader *find_arb_pshader(IWineD3DPixelShaderImpl *shader, const struct arb_ps_compile_args *args) 4233 { 4234 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)shader->baseShader.device; 4235 UINT i; 4236 DWORD new_size; 4237 struct arb_ps_compiled_shader *new_array; 4238 struct wined3d_shader_buffer buffer; 4239 struct arb_pshader_private *shader_data; 4240 GLuint ret; 4241 4242 if (!shader->baseShader.backend_data) 4243 { 4244 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 4245 struct shader_arb_priv *priv = device->shader_priv; 4246 4247 shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data)); 4248 shader_data = shader->baseShader.backend_data; 4249 shader_data->clamp_consts = shader->baseShader.reg_maps.shader_version.major == 1; 4250 4251 if(shader->baseShader.reg_maps.shader_version.major < 3) shader_data->input_signature_idx = ~0; 4252 else shader_data->input_signature_idx = find_input_signature(priv, shader->baseShader.input_signature); 4253 4254 shader_data->has_signature_idx = TRUE; 4255 TRACE("Shader got assigned input signature index %u\n", shader_data->input_signature_idx); 4256 4257 if (!device->vs_clipping) 4258 shader_data->clipplane_emulation = shader_find_free_input_register(&shader->baseShader.reg_maps, 4259 gl_info->limits.texture_stages - 1); 4260 else 4261 shader_data->clipplane_emulation = ~0U; 4262 } 4263 shader_data = shader->baseShader.backend_data; 4264 4265 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 4266 * so a linear search is more performant than a hashmap or a binary search 4267 * (cache coherency etc) 4268 */ 4269 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4270 { 4271 if (!memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args))) 4272 return &shader_data->gl_shaders[i]; 4273 } 4274 4275 TRACE("No matching GL shader found, compiling a new shader\n"); 4276 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 4277 if (shader_data->num_gl_shaders) 4278 { 4279 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 4280 new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders, 4281 new_size * sizeof(*shader_data->gl_shaders)); 4282 } else { 4283 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders)); 4284 new_size = 1; 4285 } 4286 4287 if(!new_array) { 4288 ERR("Out of memory\n"); 4289 return 0; 4290 } 4291 shader_data->gl_shaders = new_array; 4292 shader_data->shader_array_size = new_size; 4293 } 4294 4295 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 4296 4297 pixelshader_update_samplers(&shader->baseShader.reg_maps, 4298 (IWineD3DBaseTexture **)device->stateBlock->state.textures); 4299 4300 if (!shader_buffer_init(&buffer)) 4301 { 4302 ERR("Failed to initialize shader buffer.\n"); 4303 return 0; 4304 } 4305 4306 ret = shader_arb_generate_pshader(shader, &buffer, args, 4307 &shader_data->gl_shaders[shader_data->num_gl_shaders]); 4308 shader_buffer_free(&buffer); 4309 shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret; 4310 4311 return &shader_data->gl_shaders[shader_data->num_gl_shaders++]; 4312 } 4313 4314 static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new, 4315 const DWORD use_map, BOOL skip_int) { 4316 if((stored->super.swizzle_map & use_map) != new->super.swizzle_map) return FALSE; 4317 if(stored->super.clip_enabled != new->super.clip_enabled) return FALSE; 4318 if(stored->super.fog_src != new->super.fog_src) return FALSE; 4319 if(stored->clip.boolclip_compare != new->clip.boolclip_compare) return FALSE; 4320 if(stored->ps_signature != new->ps_signature) return FALSE; 4321 if(stored->vertex.samplers_compare != new->vertex.samplers_compare) return FALSE; 4322 if(skip_int) return TRUE; 4323 4324 return !memcmp(stored->loop_ctrl, new->loop_ctrl, sizeof(stored->loop_ctrl)); 4325 } 4326 4327 static struct arb_vs_compiled_shader *find_arb_vshader(IWineD3DVertexShaderImpl *shader, const struct arb_vs_compile_args *args) 4328 { 4329 UINT i; 4330 DWORD new_size; 4331 struct arb_vs_compiled_shader *new_array; 4332 DWORD use_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.use_map; 4333 struct wined3d_shader_buffer buffer; 4334 struct arb_vshader_private *shader_data; 4335 GLuint ret; 4336 const struct wined3d_gl_info *gl_info = &((IWineD3DDeviceImpl *)shader->baseShader.device)->adapter->gl_info; 4337 4338 if (!shader->baseShader.backend_data) 4339 { 4340 shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data)); 4341 } 4342 shader_data = shader->baseShader.backend_data; 4343 4344 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 4345 * so a linear search is more performant than a hashmap or a binary search 4346 * (cache coherency etc) 4347 */ 4348 for(i = 0; i < shader_data->num_gl_shaders; i++) { 4349 if (vs_args_equal(&shader_data->gl_shaders[i].args, args, 4350 use_map, gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])) 4351 { 4352 return &shader_data->gl_shaders[i]; 4353 } 4354 } 4355 4356 TRACE("No matching GL shader found, compiling a new shader\n"); 4357 4358 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 4359 if (shader_data->num_gl_shaders) 4360 { 4361 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 4362 new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders, 4363 new_size * sizeof(*shader_data->gl_shaders)); 4364 } else { 4365 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders)); 4366 new_size = 1; 4367 } 4368 4369 if(!new_array) { 4370 ERR("Out of memory\n"); 4371 return 0; 4372 } 4373 shader_data->gl_shaders = new_array; 4374 shader_data->shader_array_size = new_size; 4375 } 4376 4377 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 4378 4379 if (!shader_buffer_init(&buffer)) 4380 { 4381 ERR("Failed to initialize shader buffer.\n"); 4382 return 0; 4383 } 4384 4385 ret = shader_arb_generate_vshader(shader, &buffer, args, 4386 &shader_data->gl_shaders[shader_data->num_gl_shaders]); 4387 shader_buffer_free(&buffer); 4388 shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret; 4389 4390 return &shader_data->gl_shaders[shader_data->num_gl_shaders++]; 4391 } 4392 4393 static void find_arb_ps_compile_args(const struct wined3d_state *state, 4394 IWineD3DPixelShaderImpl *shader, struct arb_ps_compile_args *args) 4395 { 4396 int i; 4397 WORD int_skip; 4398 const struct wined3d_gl_info *gl_info = &((IWineD3DDeviceImpl *)shader->baseShader.device)->adapter->gl_info; 4399 4400 find_ps_compile_args(state, shader, &args->super); 4401 4402 /* This forces all local boolean constants to 1 to make them stateblock independent */ 4403 args->bools = shader->baseShader.reg_maps.local_bool_consts; 4404 4405 for(i = 0; i < MAX_CONST_B; i++) 4406 { 4407 if (state->ps_consts_b[i]) 4408 args->bools |= ( 1 << i); 4409 } 4410 4411 /* Only enable the clip plane emulation KIL if at least one clipplane is enabled. The KIL instruction 4412 * is quite expensive because it forces the driver to disable early Z discards. It is cheaper to 4413 * duplicate the shader than have a no-op KIL instruction in every shader 4414 */ 4415 if ((!((IWineD3DDeviceImpl *)shader->baseShader.device)->vs_clipping) && use_vs(state) 4416 && state->render_states[WINED3DRS_CLIPPING] 4417 && state->render_states[WINED3DRS_CLIPPLANEENABLE]) 4418 args->clip = 1; 4419 else 4420 args->clip = 0; 4421 4422 /* Skip if unused or local, or supported natively */ 4423 int_skip = ~shader->baseShader.reg_maps.integer_constants | shader->baseShader.reg_maps.local_int_consts; 4424 if (int_skip == 0xffff || gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]) 4425 { 4426 memset(&args->loop_ctrl, 0, sizeof(args->loop_ctrl)); 4427 return; 4428 } 4429 4430 for(i = 0; i < MAX_CONST_I; i++) 4431 { 4432 if(int_skip & (1 << i)) 4433 { 4434 args->loop_ctrl[i][0] = 0; 4435 args->loop_ctrl[i][1] = 0; 4436 args->loop_ctrl[i][2] = 0; 4437 } 4438 else 4439 { 4440 args->loop_ctrl[i][0] = state->ps_consts_i[i * 4]; 4441 args->loop_ctrl[i][1] = state->ps_consts_i[i * 4 + 1]; 4442 args->loop_ctrl[i][2] = state->ps_consts_i[i * 4 + 2]; 4443 } 4444 } 4445 } 4446 4447 static void find_arb_vs_compile_args(const struct wined3d_state *state, 4448 IWineD3DVertexShaderImpl *shader, struct arb_vs_compile_args *args) 4449 { 4450 int i; 4451 WORD int_skip; 4452 IWineD3DDeviceImpl *dev = (IWineD3DDeviceImpl *)shader->baseShader.device; 4453 const struct wined3d_gl_info *gl_info = &dev->adapter->gl_info; 4454 4455 find_vs_compile_args(state, shader, &args->super); 4456 4457 args->clip.boolclip_compare = 0; 4458 if (use_ps(state)) 4459 { 4460 IWineD3DPixelShaderImpl *ps = state->pixel_shader; 4461 struct arb_pshader_private *shader_priv = ps->baseShader.backend_data; 4462 args->ps_signature = shader_priv->input_signature_idx; 4463 4464 args->clip.boolclip.clip_texcoord = shader_priv->clipplane_emulation + 1; 4465 } 4466 else 4467 { 4468 args->ps_signature = ~0; 4469 if(!dev->vs_clipping) 4470 { 4471 args->clip.boolclip.clip_texcoord = ffp_clip_emul(state) ? gl_info->limits.texture_stages : 0; 4472 } 4473 /* Otherwise: Setting boolclip_compare set clip_texcoord to 0 */ 4474 } 4475 4476 if (args->clip.boolclip.clip_texcoord) 4477 { 4478 if (state->render_states[WINED3DRS_CLIPPING]) 4479 args->clip.boolclip.clipplane_mask = (unsigned char)state->render_states[WINED3DRS_CLIPPLANEENABLE]; 4480 /* clipplane_mask was set to 0 by setting boolclip_compare to 0 */ 4481 } 4482 4483 /* This forces all local boolean constants to 1 to make them stateblock independent */ 4484 args->clip.boolclip.bools = shader->baseShader.reg_maps.local_bool_consts; 4485 /* TODO: Figure out if it would be better to store bool constants as bitmasks in the stateblock */ 4486 for(i = 0; i < MAX_CONST_B; i++) 4487 { 4488 if (state->vs_consts_b[i]) 4489 args->clip.boolclip.bools |= ( 1 << i); 4490 } 4491 4492 args->vertex.samplers[0] = dev->texUnitMap[MAX_FRAGMENT_SAMPLERS + 0]; 4493 args->vertex.samplers[1] = dev->texUnitMap[MAX_FRAGMENT_SAMPLERS + 1]; 4494 args->vertex.samplers[2] = dev->texUnitMap[MAX_FRAGMENT_SAMPLERS + 2]; 4495 args->vertex.samplers[3] = 0; 4496 4497 /* Skip if unused or local */ 4498 int_skip = ~shader->baseShader.reg_maps.integer_constants | shader->baseShader.reg_maps.local_int_consts; 4499 /* This is about flow control, not clipping. */ 4500 if (int_skip == 0xffff || gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]) 4501 { 4502 memset(&args->loop_ctrl, 0, sizeof(args->loop_ctrl)); 4503 return; 4504 } 4505 4506 for(i = 0; i < MAX_CONST_I; i++) 4507 { 4508 if(int_skip & (1 << i)) 4509 { 4510 args->loop_ctrl[i][0] = 0; 4511 args->loop_ctrl[i][1] = 0; 4512 args->loop_ctrl[i][2] = 0; 4513 } 4514 else 4515 { 4516 args->loop_ctrl[i][0] = state->vs_consts_i[i * 4]; 4517 args->loop_ctrl[i][1] = state->vs_consts_i[i * 4 + 1]; 4518 args->loop_ctrl[i][2] = state->vs_consts_i[i * 4 + 2]; 4519 } 4520 } 4521 } 4522 4523 /* GL locking is done by the caller */ 4524 static void shader_arb_select(const struct wined3d_context *context, BOOL usePS, BOOL useVS) 4525 { 4526 IWineD3DDeviceImpl *This = context->swapchain->device; 4527 struct shader_arb_priv *priv = This->shader_priv; 4528 const struct wined3d_gl_info *gl_info = context->gl_info; 4529 const struct wined3d_state *state = &This->stateBlock->state; 4530 int i; 4531 4532 /* Deal with pixel shaders first so the vertex shader arg function has the input signature ready */ 4533 if (usePS) 4534 { 4535 IWineD3DPixelShaderImpl *ps = state->pixel_shader; 4536 struct arb_ps_compile_args compile_args; 4537 struct arb_ps_compiled_shader *compiled; 4538 4539 TRACE("Using pixel shader %p.\n", ps); 4540 find_arb_ps_compile_args(state, ps, &compile_args); 4541 compiled = find_arb_pshader(ps, &compile_args); 4542 priv->current_fprogram_id = compiled->prgId; 4543 priv->compiled_fprog = compiled; 4544 4545 /* Bind the fragment program */ 4546 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id)); 4547 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id);"); 4548 4549 if(!priv->use_arbfp_fixed_func) { 4550 /* Enable OpenGL fragment programs */ 4551 glEnable(GL_FRAGMENT_PROGRAM_ARB); 4552 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);"); 4553 } 4554 TRACE("(%p) : Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", This, priv->current_fprogram_id); 4555 4556 /* Pixel Shader 1.x constants are clamped to [-1;1], Pixel Shader 2.0 constants are not. If switching between 4557 * a 1.x and newer shader, reload the first 8 constants 4558 */ 4559 if(priv->last_ps_const_clamped != ((struct arb_pshader_private *)ps->baseShader.backend_data)->clamp_consts) 4560 { 4561 priv->last_ps_const_clamped = ((struct arb_pshader_private *)ps->baseShader.backend_data)->clamp_consts; 4562 This->highest_dirty_ps_const = max(This->highest_dirty_ps_const, 8); 4563 for(i = 0; i < 8; i++) 4564 { 4565 context->pshader_const_dirty[i] = 1; 4566 } 4567 /* Also takes care of loading local constants */ 4568 shader_arb_load_constants(context, TRUE, FALSE); 4569 } 4570 else 4571 { 4572 shader_arb_ps_local_constants(This); 4573 } 4574 4575 /* Force constant reloading for the NP2 fixup (see comment in shader_glsl_select for more info) */ 4576 if (compiled->np2fixup_info.super.active) 4577 shader_arb_load_np2fixup_constants((IWineD3DDevice *)This, usePS, useVS); 4578 } 4579 else if (gl_info->supported[ARB_FRAGMENT_PROGRAM] && !priv->use_arbfp_fixed_func) 4580 { 4581 /* Disable only if we're not using arbfp fixed function fragment processing. If this is used, 4582 * keep GL_FRAGMENT_PROGRAM_ARB enabled, and the fixed function pipeline will bind the fixed function 4583 * replacement shader 4584 */ 4585 glDisable(GL_FRAGMENT_PROGRAM_ARB); 4586 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 4587 priv->current_fprogram_id = 0; 4588 } 4589 4590 if (useVS) 4591 { 4592 IWineD3DVertexShaderImpl *vs = state->vertex_shader; 4593 struct arb_vs_compile_args compile_args; 4594 struct arb_vs_compiled_shader *compiled; 4595 4596 TRACE("Using vertex shader %p\n", vs); 4597 find_arb_vs_compile_args(state, vs, &compile_args); 4598 compiled = find_arb_vshader(vs, &compile_args); 4599 priv->current_vprogram_id = compiled->prgId; 4600 priv->compiled_vprog = compiled; 4601 4602 /* Bind the vertex program */ 4603 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id)); 4604 checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id);"); 4605 4606 /* Enable OpenGL vertex programs */ 4607 glEnable(GL_VERTEX_PROGRAM_ARB); 4608 checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);"); 4609 TRACE("(%p) : Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", This, priv->current_vprogram_id); 4610 shader_arb_vs_local_constants(This); 4611 4612 if(priv->last_vs_color_unclamp != compiled->need_color_unclamp) { 4613 priv->last_vs_color_unclamp = compiled->need_color_unclamp; 4614 4615 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT]) 4616 { 4617 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, !compiled->need_color_unclamp)); 4618 checkGLcall("glClampColorARB"); 4619 } else { 4620 FIXME("vertex color clamp needs to be changed, but extension not supported.\n"); 4621 } 4622 } 4623 } 4624 else if (gl_info->supported[ARB_VERTEX_PROGRAM]) 4625 { 4626 priv->current_vprogram_id = 0; 4627 glDisable(GL_VERTEX_PROGRAM_ARB); 4628 checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)"); 4629 } 4630 } 4631 4632 /* GL locking is done by the caller */ 4633 static void shader_arb_select_depth_blt(IWineD3DDevice *iface, enum tex_types tex_type, const SIZE *ds_mask_size) 4634 { 4635 const float mask[] = {0.0f, 0.0f, (float)ds_mask_size->cx, (float)ds_mask_size->cy}; 4636 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface; 4637 BOOL masked = ds_mask_size->cx && ds_mask_size->cy; 4638 struct shader_arb_priv *priv = This->shader_priv; 4639 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info; 4640 GLuint *blt_fprogram; 4641 4642 if (!priv->depth_blt_vprogram_id) priv->depth_blt_vprogram_id = create_arb_blt_vertex_program(gl_info); 4643 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->depth_blt_vprogram_id)); 4644 glEnable(GL_VERTEX_PROGRAM_ARB); 4645 4646 blt_fprogram = masked ? &priv->depth_blt_fprogram_id_masked[tex_type] : &priv->depth_blt_fprogram_id_full[tex_type]; 4647 if (!*blt_fprogram) *blt_fprogram = create_arb_blt_fragment_program(gl_info, tex_type, masked); 4648 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, *blt_fprogram)); 4649 if (masked) GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, mask)); 4650 glEnable(GL_FRAGMENT_PROGRAM_ARB); 4651 } 4652 4653 /* GL locking is done by the caller */ 4654 static void shader_arb_deselect_depth_blt(IWineD3DDevice *iface) { 4655 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface; 4656 struct shader_arb_priv *priv = This->shader_priv; 4657 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info; 4658 4659 if (priv->current_vprogram_id) { 4660 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id)); 4661 checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vertexShader->prgId);"); 4662 4663 TRACE("(%p) : Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", This, priv->current_vprogram_id); 4664 } else { 4665 glDisable(GL_VERTEX_PROGRAM_ARB); 4666 checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)"); 4667 } 4668 4669 if (priv->current_fprogram_id) { 4670 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id)); 4671 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, pixelShader->prgId);"); 4672 4673 TRACE("(%p) : Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", This, priv->current_fprogram_id); 4674 } else if(!priv->use_arbfp_fixed_func) { 4675 glDisable(GL_FRAGMENT_PROGRAM_ARB); 4676 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 4677 } 4678 } 4679 4680 static void shader_arb_destroy(IWineD3DBaseShader *iface) { 4681 IWineD3DBaseShaderImpl *baseShader = (IWineD3DBaseShaderImpl *) iface; 4682 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)baseShader->baseShader.device; 4683 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 4684 4685 if (shader_is_pshader_version(baseShader->baseShader.reg_maps.shader_version.type)) 4686 { 4687 struct arb_pshader_private *shader_data = baseShader->baseShader.backend_data; 4688 UINT i; 4689 4690 if(!shader_data) return; /* This can happen if a shader was never compiled */ 4691 4692 if (shader_data->num_gl_shaders) 4693 { 4694 struct wined3d_context *context = context_acquire(device, NULL); 4695 4696 ENTER_GL(); 4697 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4698 { 4699 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId)); 4700 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))"); 4701 } 4702 LEAVE_GL(); 4703 4704 context_release(context); 4705 } 4706 4707 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders); 4708 HeapFree(GetProcessHeap(), 0, shader_data); 4709 baseShader->baseShader.backend_data = NULL; 4710 } 4711 else 4712 { 4713 struct arb_vshader_private *shader_data = baseShader->baseShader.backend_data; 4714 UINT i; 4715 4716 if(!shader_data) return; /* This can happen if a shader was never compiled */ 4717 4718 if (shader_data->num_gl_shaders) 4719 { 4720 struct wined3d_context *context = context_acquire(device, NULL); 4721 4722 ENTER_GL(); 4723 for (i = 0; i < shader_data->num_gl_shaders; ++i) 4724 { 4725 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId)); 4726 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))"); 4727 } 4728 LEAVE_GL(); 4729 4730 context_release(context); 4731 } 4732 4733 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders); 4734 HeapFree(GetProcessHeap(), 0, shader_data); 4735 baseShader->baseShader.backend_data = NULL; 4736 } 4737 } 4738 4739 static int sig_tree_compare(const void *key, const struct wine_rb_entry *entry) 4740 { 4741 struct ps_signature *e = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 4742 return compare_sig(key, e->sig); 4743 } 4744 4745 static const struct wine_rb_functions sig_tree_functions = 4746 { 4747 wined3d_rb_alloc, 4748 wined3d_rb_realloc, 4749 wined3d_rb_free, 4750 sig_tree_compare 4751 }; 4752 4753 static HRESULT shader_arb_alloc(IWineD3DDevice *iface) { 4754 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface; 4755 struct shader_arb_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*priv)); 4756 if(wine_rb_init(&priv->signature_tree, &sig_tree_functions) == -1) 4757 { 4758 ERR("RB tree init failed\n"); 4759 HeapFree(GetProcessHeap(), 0, priv); 4760 return E_OUTOFMEMORY; 4761 } 4762 This->shader_priv = priv; 4763 return WINED3D_OK; 4764 } 4765 4766 static void release_signature(struct wine_rb_entry *entry, void *context) 4767 { 4768 struct ps_signature *sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 4769 int i; 4770 for(i = 0; i < MAX_REG_INPUT; i++) 4771 { 4772 HeapFree(GetProcessHeap(), 0, (char *) sig->sig[i].semantic_name); 4773 } 4774 HeapFree(GetProcessHeap(), 0, sig->sig); 4775 HeapFree(GetProcessHeap(), 0, sig); 4776 } 4777 4778 /* Context activation is done by the caller. */ 4779 static void shader_arb_free(IWineD3DDevice *iface) { 4780 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface; 4781 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info; 4782 struct shader_arb_priv *priv = This->shader_priv; 4783 int i; 4784 4785 ENTER_GL(); 4786 if(priv->depth_blt_vprogram_id) { 4787 GL_EXTCALL(glDeleteProgramsARB(1, &priv->depth_blt_vprogram_id)); 4788 } 4789 for (i = 0; i < tex_type_count; ++i) 4790 { 4791 if (priv->depth_blt_fprogram_id_full[i]) 4792 { 4793 GL_EXTCALL(glDeleteProgramsARB(1, &priv->depth_blt_fprogram_id_full[i])); 4794 } 4795 if (priv->depth_blt_fprogram_id_masked[i]) 4796 { 4797 GL_EXTCALL(glDeleteProgramsARB(1, &priv->depth_blt_fprogram_id_masked[i])); 4798 } 4799 } 4800 LEAVE_GL(); 4801 4802 wine_rb_destroy(&priv->signature_tree, release_signature, NULL); 4803 HeapFree(GetProcessHeap(), 0, This->shader_priv); 4804 } 4805 4806 static BOOL shader_arb_dirty_const(IWineD3DDevice *iface) { 4807 return TRUE; 4808 } 4809 4810 static void shader_arb_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *pCaps) 4811 { 4812 if (gl_info->supported[ARB_VERTEX_PROGRAM]) 4813 { 4814 DWORD vs_consts; 4815 4816 /* 96 is the minimum allowed value of MAX_PROGRAM_ENV_PARAMETERS_ARB 4817 * for vertex programs. If the native limit is less than that it's 4818 * not very useful, and e.g. Mesa swrast returns 0, probably to 4819 * indicate it's a software implementation. */ 4820 if (gl_info->limits.arb_vs_native_constants < 96) 4821 vs_consts = gl_info->limits.arb_vs_float_constants; 4822 else 4823 vs_consts = min(gl_info->limits.arb_vs_float_constants, gl_info->limits.arb_vs_native_constants); 4824 4825 if (gl_info->supported[NV_VERTEX_PROGRAM3]) 4826 { 4827 pCaps->VertexShaderVersion = WINED3DVS_VERSION(3,0); 4828 TRACE_(d3d_caps)("Hardware vertex shader version 3.0 enabled (NV_VERTEX_PROGRAM3)\n"); 4829 } 4830 else if (vs_consts >= 256) 4831 { 4832 /* Shader Model 2.0 requires at least 256 vertex shader constants */ 4833 pCaps->VertexShaderVersion = WINED3DVS_VERSION(2,0); 4834 TRACE_(d3d_caps)("Hardware vertex shader version 2.0 enabled (ARB_PROGRAM)\n"); 4835 } 4836 else 4837 { 4838 pCaps->VertexShaderVersion = WINED3DVS_VERSION(1,1); 4839 TRACE_(d3d_caps)("Hardware vertex shader version 1.1 enabled (ARB_PROGRAM)\n"); 4840 } 4841 pCaps->MaxVertexShaderConst = vs_consts; 4842 } 4843 else 4844 { 4845 pCaps->VertexShaderVersion = 0; 4846 pCaps->MaxVertexShaderConst = 0; 4847 } 4848 4849 if (gl_info->supported[ARB_FRAGMENT_PROGRAM]) 4850 { 4851 DWORD ps_consts; 4852 4853 /* Similar as above for vertex programs, but the minimum for fragment 4854 * programs is 24. */ 4855 if (gl_info->limits.arb_ps_native_constants < 24) 4856 ps_consts = gl_info->limits.arb_ps_float_constants; 4857 else 4858 ps_consts = min(gl_info->limits.arb_ps_float_constants, gl_info->limits.arb_ps_native_constants); 4859 4860 if (gl_info->supported[NV_FRAGMENT_PROGRAM2]) 4861 { 4862 pCaps->PixelShaderVersion = WINED3DPS_VERSION(3,0); 4863 TRACE_(d3d_caps)("Hardware pixel shader version 3.0 enabled (NV_FRAGMENT_PROGRAM2)\n"); 4864 } 4865 else if (ps_consts >= 32) 4866 { 4867 /* Shader Model 2.0 requires at least 32 pixel shader constants */ 4868 pCaps->PixelShaderVersion = WINED3DPS_VERSION(2,0); 4869 TRACE_(d3d_caps)("Hardware pixel shader version 2.0 enabled (ARB_PROGRAM)\n"); 4870 } 4871 else 4872 { 4873 pCaps->PixelShaderVersion = WINED3DPS_VERSION(1,4); 4874 TRACE_(d3d_caps)("Hardware pixel shader version 1.4 enabled (ARB_PROGRAM)\n"); 4875 } 4876 pCaps->PixelShader1xMaxValue = 8.0f; 4877 pCaps->MaxPixelShaderConst = ps_consts; 4878 } 4879 else 4880 { 4881 pCaps->PixelShaderVersion = 0; 4882 pCaps->PixelShader1xMaxValue = 0.0f; 4883 pCaps->MaxPixelShaderConst = 0; 4884 } 4885 4886 pCaps->VSClipping = use_nv_clip(gl_info); 4887 } 4888 4889 static BOOL shader_arb_color_fixup_supported(struct color_fixup_desc fixup) 4890 { 4891 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d)) 4892 { 4893 TRACE("Checking support for color_fixup:\n"); 4894 dump_color_fixup_desc(fixup); 4895 } 4896 4897 /* We support everything except complex conversions. */ 4898 if (!is_complex_fixup(fixup)) 4899 { 4900 TRACE("[OK]\n"); 4901 return TRUE; 4902 } 4903 4904 TRACE("[FAILED]\n"); 4905 return FALSE; 4906 } 4907 4908 static void shader_arb_add_instruction_modifiers(const struct wined3d_shader_instruction *ins) { 4909 DWORD shift; 4910 char write_mask[20], regstr[50]; 4911 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 4912 BOOL is_color = FALSE; 4913 const struct wined3d_shader_dst_param *dst; 4914 4915 if (!ins->dst_count) return; 4916 4917 dst = &ins->dst[0]; 4918 shift = dst->shift; 4919 if (!shift) return; /* Saturate alone is handled by the instructions */ 4920 4921 shader_arb_get_write_mask(ins, dst, write_mask); 4922 shader_arb_get_register_name(ins, &dst->reg, regstr, &is_color); 4923 4924 /* Generate a line that does the output modifier computation 4925 * FIXME: _SAT vs shift? _SAT alone is already handled in the instructions, if this 4926 * maps problems in e.g. _d4_sat modify shader_arb_get_modifier 4927 */ 4928 shader_addline(buffer, "MUL%s %s%s, %s, %s;\n", shader_arb_get_modifier(ins), 4929 regstr, write_mask, regstr, shift_tab[shift]); 4930 } 4931 4932 static const SHADER_HANDLER shader_arb_instruction_handler_table[WINED3DSIH_TABLE_SIZE] = 4933 { 4934 /* WINED3DSIH_ABS */ shader_hw_map2gl, 4935 /* WINED3DSIH_ADD */ shader_hw_map2gl, 4936 /* WINED3DSIH_AND */ NULL, 4937 /* WINED3DSIH_BEM */ pshader_hw_bem, 4938 /* WINED3DSIH_BREAK */ shader_hw_break, 4939 /* WINED3DSIH_BREAKC */ shader_hw_breakc, 4940 /* WINED3DSIH_BREAKP */ NULL, 4941 /* WINED3DSIH_CALL */ shader_hw_call, 4942 /* WINED3DSIH_CALLNZ */ NULL, 4943 /* WINED3DSIH_CMP */ pshader_hw_cmp, 4944 /* WINED3DSIH_CND */ pshader_hw_cnd, 4945 /* WINED3DSIH_CRS */ shader_hw_map2gl, 4946 /* WINED3DSIH_CUT */ NULL, 4947 /* WINED3DSIH_DCL */ NULL, 4948 /* WINED3DSIH_DEF */ NULL, 4949 /* WINED3DSIH_DEFB */ NULL, 4950 /* WINED3DSIH_DEFI */ NULL, 4951 /* WINED3DSIH_DP2ADD */ pshader_hw_dp2add, 4952 /* WINED3DSIH_DP3 */ shader_hw_map2gl, 4953 /* WINED3DSIH_DP4 */ shader_hw_map2gl, 4954 /* WINED3DSIH_DST */ shader_hw_map2gl, 4955 /* WINED3DSIH_DSX */ shader_hw_map2gl, 4956 /* WINED3DSIH_DSY */ shader_hw_dsy, 4957 /* WINED3DSIH_ELSE */ shader_hw_else, 4958 /* WINED3DSIH_EMIT */ NULL, 4959 /* WINED3DSIH_ENDIF */ shader_hw_endif, 4960 /* WINED3DSIH_ENDLOOP */ shader_hw_endloop, 4961 /* WINED3DSIH_ENDREP */ shader_hw_endrep, 4962 /* WINED3DSIH_EXP */ shader_hw_scalar_op, 4963 /* WINED3DSIH_EXPP */ shader_hw_scalar_op, 4964 /* WINED3DSIH_FRC */ shader_hw_map2gl, 4965 /* WINED3DSIH_IADD */ NULL, 4966 /* WINED3DSIH_IF */ NULL /* Hardcoded into the shader */, 4967 /* WINED3DSIH_IFC */ shader_hw_ifc, 4968 /* WINED3DSIH_IGE */ NULL, 4969 /* WINED3DSIH_IMUL */ NULL, 4970 /* WINED3DSIH_LABEL */ shader_hw_label, 4971 /* WINED3DSIH_LIT */ shader_hw_map2gl, 4972 /* WINED3DSIH_LOG */ shader_hw_log_pow, 4973 /* WINED3DSIH_LOGP */ shader_hw_log_pow, 4974 /* WINED3DSIH_LOOP */ shader_hw_loop, 4975 /* WINED3DSIH_LRP */ shader_hw_lrp, 4976 /* WINED3DSIH_LT */ NULL, 4977 /* WINED3DSIH_M3x2 */ shader_hw_mnxn, 4978 /* WINED3DSIH_M3x3 */ shader_hw_mnxn, 4979 /* WINED3DSIH_M3x4 */ shader_hw_mnxn, 4980 /* WINED3DSIH_M4x3 */ shader_hw_mnxn, 4981 /* WINED3DSIH_M4x4 */ shader_hw_mnxn, 4982 /* WINED3DSIH_MAD */ shader_hw_map2gl, 4983 /* WINED3DSIH_MAX */ shader_hw_map2gl, 4984 /* WINED3DSIH_MIN */ shader_hw_map2gl, 4985 /* WINED3DSIH_MOV */ shader_hw_mov, 4986 /* WINED3DSIH_MOVA */ shader_hw_mov, 4987 /* WINED3DSIH_MOVC */ NULL, 4988 /* WINED3DSIH_MUL */ shader_hw_map2gl, 4989 /* WINED3DSIH_NOP */ shader_hw_nop, 4990 /* WINED3DSIH_NRM */ shader_hw_nrm, 4991 /* WINED3DSIH_PHASE */ NULL, 4992 /* WINED3DSIH_POW */ shader_hw_log_pow, 4993 /* WINED3DSIH_RCP */ shader_hw_rcp, 4994 /* WINED3DSIH_REP */ shader_hw_rep, 4995 /* WINED3DSIH_RET */ shader_hw_ret, 4996 /* WINED3DSIH_RSQ */ shader_hw_scalar_op, 4997 /* WINED3DSIH_SETP */ NULL, 4998 /* WINED3DSIH_SGE */ shader_hw_map2gl, 4999 /* WINED3DSIH_SGN */ shader_hw_sgn, 5000 /* WINED3DSIH_SINCOS */ shader_hw_sincos, 5001 /* WINED3DSIH_SLT */ shader_hw_map2gl, 5002 /* WINED3DSIH_SUB */ shader_hw_map2gl, 5003 /* WINED3DSIH_TEX */ pshader_hw_tex, 5004 /* WINED3DSIH_TEXBEM */ pshader_hw_texbem, 5005 /* WINED3DSIH_TEXBEML */ pshader_hw_texbem, 5006 /* WINED3DSIH_TEXCOORD */ pshader_hw_texcoord, 5007 /* WINED3DSIH_TEXDEPTH */ pshader_hw_texdepth, 5008 /* WINED3DSIH_TEXDP3 */ pshader_hw_texdp3, 5009 /* WINED3DSIH_TEXDP3TEX */ pshader_hw_texdp3tex, 5010 /* WINED3DSIH_TEXKILL */ pshader_hw_texkill, 5011 /* WINED3DSIH_TEXLDD */ shader_hw_texldd, 5012 /* WINED3DSIH_TEXLDL */ shader_hw_texldl, 5013 /* WINED3DSIH_TEXM3x2DEPTH */ pshader_hw_texm3x2depth, 5014 /* WINED3DSIH_TEXM3x2PAD */ pshader_hw_texm3x2pad, 5015 /* WINED3DSIH_TEXM3x2TEX */ pshader_hw_texm3x2tex, 5016 /* WINED3DSIH_TEXM3x3 */ pshader_hw_texm3x3, 5017 /* WINED3DSIH_TEXM3x3DIFF */ NULL, 5018 /* WINED3DSIH_TEXM3x3PAD */ pshader_hw_texm3x3pad, 5019 /* WINED3DSIH_TEXM3x3SPEC */ pshader_hw_texm3x3spec, 5020 /* WINED3DSIH_TEXM3x3TEX */ pshader_hw_texm3x3tex, 5021 /* WINED3DSIH_TEXM3x3VSPEC */ pshader_hw_texm3x3vspec, 5022 /* WINED3DSIH_TEXREG2AR */ pshader_hw_texreg2ar, 5023 /* WINED3DSIH_TEXREG2GB */ pshader_hw_texreg2gb, 5024 /* WINED3DSIH_TEXREG2RGB */ pshader_hw_texreg2rgb, 5025 }; 5026 5027 static inline BOOL get_bool_const(const struct wined3d_shader_instruction *ins, IWineD3DBaseShaderImpl *This, DWORD idx) 5028 { 5029 BOOL vshader = shader_is_vshader_version(This->baseShader.reg_maps.shader_version.type); 5030 WORD bools = 0; 5031 WORD flag = (1 << idx); 5032 const local_constant *constant; 5033 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5034 5035 if(This->baseShader.reg_maps.local_bool_consts & flag) 5036 { 5037 /* What good is a if(bool) with a hardcoded local constant? I don't know, but handle it */ 5038 LIST_FOR_EACH_ENTRY(constant, &This->baseShader.constantsB, local_constant, entry) 5039 { 5040 if (constant->idx == idx) 5041 { 5042 return constant->value[0]; 5043 } 5044 } 5045 ERR("Local constant not found\n"); 5046 return FALSE; 5047 } 5048 else 5049 { 5050 if(vshader) bools = priv->cur_vs_args->clip.boolclip.bools; 5051 else bools = priv->cur_ps_args->bools; 5052 return bools & flag; 5053 } 5054 } 5055 5056 static void get_loop_control_const(const struct wined3d_shader_instruction *ins, 5057 IWineD3DBaseShaderImpl *This, UINT idx, struct wined3d_shader_loop_control *loop_control) 5058 { 5059 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5060 5061 /* Integer constants can either be a local constant, or they can be stored in the shader 5062 * type specific compile args. */ 5063 if (This->baseShader.reg_maps.local_int_consts & (1 << idx)) 5064 { 5065 const local_constant *constant; 5066 5067 LIST_FOR_EACH_ENTRY(constant, &This->baseShader.constantsI, local_constant, entry) 5068 { 5069 if (constant->idx == idx) 5070 { 5071 loop_control->count = constant->value[0]; 5072 loop_control->start = constant->value[1]; 5073 /* Step is signed. */ 5074 loop_control->step = (int)constant->value[2]; 5075 return; 5076 } 5077 } 5078 /* If this happens the flag was set incorrectly */ 5079 ERR("Local constant not found\n"); 5080 loop_control->count = 0; 5081 loop_control->start = 0; 5082 loop_control->step = 0; 5083 return; 5084 } 5085 5086 switch (This->baseShader.reg_maps.shader_version.type) 5087 { 5088 case WINED3D_SHADER_TYPE_VERTEX: 5089 /* Count and aL start value are unsigned */ 5090 loop_control->count = priv->cur_vs_args->loop_ctrl[idx][0]; 5091 loop_control->start = priv->cur_vs_args->loop_ctrl[idx][1]; 5092 /* Step is signed. */ 5093 loop_control->step = ((char)priv->cur_vs_args->loop_ctrl[idx][2]); 5094 break; 5095 5096 case WINED3D_SHADER_TYPE_PIXEL: 5097 loop_control->count = priv->cur_ps_args->loop_ctrl[idx][0]; 5098 loop_control->start = priv->cur_ps_args->loop_ctrl[idx][1]; 5099 loop_control->step = ((char)priv->cur_ps_args->loop_ctrl[idx][2]); 5100 break; 5101 5102 default: 5103 FIXME("Unhandled shader type %#x.\n", This->baseShader.reg_maps.shader_version.type); 5104 break; 5105 } 5106 } 5107 5108 static void record_instruction(struct list *list, const struct wined3d_shader_instruction *ins) 5109 { 5110 unsigned int i; 5111 struct wined3d_shader_dst_param *dst_param = NULL; 5112 struct wined3d_shader_src_param *src_param = NULL, *rel_addr = NULL; 5113 struct recorded_instruction *rec = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*rec)); 5114 if(!rec) 5115 { 5116 ERR("Out of memory\n"); 5117 return; 5118 } 5119 5120 rec->ins = *ins; 5121 dst_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*dst_param)); 5122 if(!dst_param) goto free; 5123 *dst_param = *ins->dst; 5124 if(ins->dst->reg.rel_addr) 5125 { 5126 rel_addr = HeapAlloc(GetProcessHeap(), 0, sizeof(*dst_param->reg.rel_addr)); 5127 if(!rel_addr) goto free; 5128 *rel_addr = *ins->dst->reg.rel_addr; 5129 dst_param->reg.rel_addr = rel_addr; 5130 } 5131 rec->ins.dst = dst_param; 5132 5133 src_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*src_param) * ins->src_count); 5134 if(!src_param) goto free; 5135 for(i = 0; i < ins->src_count; i++) 5136 { 5137 src_param[i] = ins->src[i]; 5138 if(ins->src[i].reg.rel_addr) 5139 { 5140 rel_addr = HeapAlloc(GetProcessHeap(), 0, sizeof(*rel_addr)); 5141 if(!rel_addr) goto free; 5142 *rel_addr = *ins->src[i].reg.rel_addr; 5143 src_param[i].reg.rel_addr = rel_addr; 5144 } 5145 } 5146 rec->ins.src = src_param; 5147 list_add_tail(list, &rec->entry); 5148 return; 5149 5150 free: 5151 ERR("Out of memory\n"); 5152 if(dst_param) 5153 { 5154 HeapFree(GetProcessHeap(), 0, (void *) dst_param->reg.rel_addr); 5155 HeapFree(GetProcessHeap(), 0, dst_param); 5156 } 5157 if(src_param) 5158 { 5159 for(i = 0; i < ins->src_count; i++) 5160 { 5161 HeapFree(GetProcessHeap(), 0, (void *) src_param[i].reg.rel_addr); 5162 } 5163 HeapFree(GetProcessHeap(), 0, src_param); 5164 } 5165 HeapFree(GetProcessHeap(), 0, rec); 5166 } 5167 5168 static void free_recorded_instruction(struct list *list) 5169 { 5170 struct recorded_instruction *rec_ins, *entry2; 5171 unsigned int i; 5172 5173 LIST_FOR_EACH_ENTRY_SAFE(rec_ins, entry2, list, struct recorded_instruction, entry) 5174 { 5175 list_remove(&rec_ins->entry); 5176 if(rec_ins->ins.dst) 5177 { 5178 HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.dst->reg.rel_addr); 5179 HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.dst); 5180 } 5181 if(rec_ins->ins.src) 5182 { 5183 for(i = 0; i < rec_ins->ins.src_count; i++) 5184 { 5185 HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.src[i].reg.rel_addr); 5186 } 5187 HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.src); 5188 } 5189 HeapFree(GetProcessHeap(), 0, rec_ins); 5190 } 5191 } 5192 5193 static void shader_arb_handle_instruction(const struct wined3d_shader_instruction *ins) { 5194 SHADER_HANDLER hw_fct; 5195 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 5196 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader; 5197 struct control_frame *control_frame; 5198 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 5199 BOOL bool_const; 5200 5201 if(ins->handler_idx == WINED3DSIH_LOOP || ins->handler_idx == WINED3DSIH_REP) 5202 { 5203 control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame)); 5204 list_add_head(&priv->control_frames, &control_frame->entry); 5205 5206 if(ins->handler_idx == WINED3DSIH_LOOP) control_frame->type = LOOP; 5207 if(ins->handler_idx == WINED3DSIH_REP) control_frame->type = REP; 5208 5209 if(priv->target_version >= NV2) 5210 { 5211 control_frame->no.loop = priv->num_loops++; 5212 priv->loop_depth++; 5213 } 5214 else 5215 { 5216 /* Don't bother recording when we're in a not used if branch */ 5217 if(priv->muted) 5218 { 5219 return; 5220 } 5221 5222 if(!priv->recording) 5223 { 5224 list_init(&priv->record); 5225 priv->recording = TRUE; 5226 control_frame->outer_loop = TRUE; 5227 get_loop_control_const(ins, This, ins->src[0].reg.idx, &control_frame->loop_control); 5228 return; /* Instruction is handled */ 5229 } 5230 /* Record this loop in the outer loop's recording */ 5231 } 5232 } 5233 else if(ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP) 5234 { 5235 if(priv->target_version >= NV2) 5236 { 5237 /* Nothing to do. The control frame is popped after the HW instr handler */ 5238 } 5239 else 5240 { 5241 struct list *e = list_head(&priv->control_frames); 5242 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5243 list_remove(&control_frame->entry); 5244 5245 if(control_frame->outer_loop) 5246 { 5247 unsigned int iteration; 5248 int aL = 0; 5249 struct list copy; 5250 5251 /* Turn off recording before playback */ 5252 priv->recording = FALSE; 5253 5254 /* Move the recorded instructions to a separate list and get them out of the private data 5255 * structure. If there are nested loops, the shader_arb_handle_instruction below will 5256 * be recorded again, thus priv->record might be overwritten 5257 */ 5258 list_init(©); 5259 list_move_tail(©, &priv->record); 5260 list_init(&priv->record); 5261 5262 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5263 { 5264 shader_addline(buffer, "#unrolling loop: %u iterations, aL=%u, inc %d\n", 5265 control_frame->loop_control.count, control_frame->loop_control.start, 5266 control_frame->loop_control.step); 5267 aL = control_frame->loop_control.start; 5268 } 5269 else 5270 { 5271 shader_addline(buffer, "#unrolling rep: %u iterations\n", control_frame->loop_control.count); 5272 } 5273 5274 for (iteration = 0; iteration < control_frame->loop_control.count; ++iteration) 5275 { 5276 struct recorded_instruction *rec_ins; 5277 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5278 { 5279 priv->aL = aL; 5280 shader_addline(buffer, "#Iteration %u, aL=%d\n", iteration, aL); 5281 } 5282 else 5283 { 5284 shader_addline(buffer, "#Iteration %u\n", iteration); 5285 } 5286 5287 LIST_FOR_EACH_ENTRY(rec_ins, ©, struct recorded_instruction, entry) 5288 { 5289 shader_arb_handle_instruction(&rec_ins->ins); 5290 } 5291 5292 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 5293 { 5294 aL += control_frame->loop_control.step; 5295 } 5296 } 5297 shader_addline(buffer, "#end loop/rep\n"); 5298 5299 free_recorded_instruction(©); 5300 HeapFree(GetProcessHeap(), 0, control_frame); 5301 return; /* Instruction is handled */ 5302 } 5303 else 5304 { 5305 /* This is a nested loop. Proceed to the normal recording function */ 5306 HeapFree(GetProcessHeap(), 0, control_frame); 5307 } 5308 } 5309 } 5310 5311 if(priv->recording) 5312 { 5313 record_instruction(&priv->record, ins); 5314 return; 5315 } 5316 5317 /* boolean if */ 5318 if(ins->handler_idx == WINED3DSIH_IF) 5319 { 5320 control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame)); 5321 list_add_head(&priv->control_frames, &control_frame->entry); 5322 control_frame->type = IF; 5323 5324 bool_const = get_bool_const(ins, This, ins->src[0].reg.idx); 5325 if(ins->src[0].modifiers == WINED3DSPSM_NOT) bool_const = !bool_const; 5326 if (!priv->muted && !bool_const) 5327 { 5328 shader_addline(buffer, "#if(FALSE){\n"); 5329 priv->muted = TRUE; 5330 control_frame->muting = TRUE; 5331 } 5332 else shader_addline(buffer, "#if(TRUE) {\n"); 5333 5334 return; /* Instruction is handled */ 5335 } 5336 else if(ins->handler_idx == WINED3DSIH_IFC) 5337 { 5338 /* IF(bool) and if_cond(a, b) use the same ELSE and ENDIF tokens */ 5339 control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame)); 5340 control_frame->type = IFC; 5341 control_frame->no.ifc = priv->num_ifcs++; 5342 list_add_head(&priv->control_frames, &control_frame->entry); 5343 } 5344 else if(ins->handler_idx == WINED3DSIH_ELSE) 5345 { 5346 struct list *e = list_head(&priv->control_frames); 5347 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5348 5349 if(control_frame->type == IF) 5350 { 5351 shader_addline(buffer, "#} else {\n"); 5352 if(!priv->muted && !control_frame->muting) 5353 { 5354 priv->muted = TRUE; 5355 control_frame->muting = TRUE; 5356 } 5357 else if(control_frame->muting) priv->muted = FALSE; 5358 return; /* Instruction is handled. */ 5359 } 5360 /* In case of an ifc, generate a HW shader instruction */ 5361 } 5362 else if(ins->handler_idx == WINED3DSIH_ENDIF) 5363 { 5364 struct list *e = list_head(&priv->control_frames); 5365 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5366 5367 if(control_frame->type == IF) 5368 { 5369 shader_addline(buffer, "#} endif\n"); 5370 if(control_frame->muting) priv->muted = FALSE; 5371 list_remove(&control_frame->entry); 5372 HeapFree(GetProcessHeap(), 0, control_frame); 5373 return; /* Instruction is handled */ 5374 } 5375 } 5376 5377 if(priv->muted) return; 5378 5379 /* Select handler */ 5380 hw_fct = shader_arb_instruction_handler_table[ins->handler_idx]; 5381 5382 /* Unhandled opcode */ 5383 if (!hw_fct) 5384 { 5385 FIXME("Backend can't handle opcode %#x\n", ins->handler_idx); 5386 return; 5387 } 5388 hw_fct(ins); 5389 5390 if(ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP) 5391 { 5392 struct list *e = list_head(&priv->control_frames); 5393 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5394 list_remove(&control_frame->entry); 5395 HeapFree(GetProcessHeap(), 0, control_frame); 5396 priv->loop_depth--; 5397 } 5398 else if(ins->handler_idx == WINED3DSIH_ENDIF) 5399 { 5400 /* Non-ifc ENDIFs don't reach that place because of the return in the if block above */ 5401 struct list *e = list_head(&priv->control_frames); 5402 control_frame = LIST_ENTRY(e, struct control_frame, entry); 5403 list_remove(&control_frame->entry); 5404 HeapFree(GetProcessHeap(), 0, control_frame); 5405 } 5406 5407 5408 shader_arb_add_instruction_modifiers(ins); 5409 } 5410 5411 const shader_backend_t arb_program_shader_backend = { 5412 shader_arb_handle_instruction, 5413 shader_arb_select, 5414 shader_arb_select_depth_blt, 5415 shader_arb_deselect_depth_blt, 5416 shader_arb_update_float_vertex_constants, 5417 shader_arb_update_float_pixel_constants, 5418 shader_arb_load_constants, 5419 shader_arb_load_np2fixup_constants, 5420 shader_arb_destroy, 5421 shader_arb_alloc, 5422 shader_arb_free, 5423 shader_arb_dirty_const, 5424 shader_arb_get_caps, 5425 shader_arb_color_fixup_supported, 5426 }; 5427 5428 /* ARB_fragment_program fixed function pipeline replacement definitions */ 5429 #define ARB_FFP_CONST_TFACTOR 0 5430 #define ARB_FFP_CONST_SPECULAR_ENABLE ((ARB_FFP_CONST_TFACTOR) + 1) 5431 #define ARB_FFP_CONST_CONSTANT(i) ((ARB_FFP_CONST_SPECULAR_ENABLE) + 1 + i) 5432 #define ARB_FFP_CONST_BUMPMAT(i) ((ARB_FFP_CONST_CONSTANT(7)) + 1 + i) 5433 #define ARB_FFP_CONST_LUMINANCE(i) ((ARB_FFP_CONST_BUMPMAT(7)) + 1 + i) 5434 5435 struct arbfp_ffp_desc 5436 { 5437 struct ffp_frag_desc parent; 5438 GLuint shader; 5439 unsigned int num_textures_used; 5440 }; 5441 5442 /* Context activation is done by the caller. */ 5443 static void arbfp_enable(IWineD3DDevice *iface, BOOL enable) { 5444 ENTER_GL(); 5445 if(enable) { 5446 glEnable(GL_FRAGMENT_PROGRAM_ARB); 5447 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)"); 5448 } else { 5449 glDisable(GL_FRAGMENT_PROGRAM_ARB); 5450 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 5451 } 5452 LEAVE_GL(); 5453 } 5454 5455 static HRESULT arbfp_alloc(IWineD3DDevice *iface) { 5456 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *) iface; 5457 struct shader_arb_priv *priv; 5458 /* Share private data between the shader backend and the pipeline replacement, if both 5459 * are the arb implementation. This is needed to figure out whether ARBfp should be disabled 5460 * if no pixel shader is bound or not 5461 */ 5462 if(This->shader_backend == &arb_program_shader_backend) { 5463 This->fragment_priv = This->shader_priv; 5464 } else { 5465 This->fragment_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_arb_priv)); 5466 if(!This->fragment_priv) return E_OUTOFMEMORY; 5467 } 5468 priv = This->fragment_priv; 5469 if (wine_rb_init(&priv->fragment_shaders, &wined3d_ffp_frag_program_rb_functions) == -1) 5470 { 5471 ERR("Failed to initialize rbtree.\n"); 5472 HeapFree(GetProcessHeap(), 0, This->fragment_priv); 5473 return E_OUTOFMEMORY; 5474 } 5475 priv->use_arbfp_fixed_func = TRUE; 5476 return WINED3D_OK; 5477 } 5478 5479 /* Context activation is done by the caller. */ 5480 static void arbfp_free_ffpshader(struct wine_rb_entry *entry, void *context) 5481 { 5482 const struct wined3d_gl_info *gl_info = context; 5483 struct arbfp_ffp_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_ffp_desc, parent.entry); 5484 5485 ENTER_GL(); 5486 GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader)); 5487 checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)"); 5488 HeapFree(GetProcessHeap(), 0, entry_arb); 5489 LEAVE_GL(); 5490 } 5491 5492 /* Context activation is done by the caller. */ 5493 static void arbfp_free(IWineD3DDevice *iface) { 5494 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *) iface; 5495 struct shader_arb_priv *priv = This->fragment_priv; 5496 5497 wine_rb_destroy(&priv->fragment_shaders, arbfp_free_ffpshader, &This->adapter->gl_info); 5498 priv->use_arbfp_fixed_func = FALSE; 5499 5500 if(This->shader_backend != &arb_program_shader_backend) { 5501 HeapFree(GetProcessHeap(), 0, This->fragment_priv); 5502 } 5503 } 5504 5505 static void arbfp_get_caps(const struct wined3d_gl_info *gl_info, struct fragment_caps *caps) 5506 { 5507 caps->PrimitiveMiscCaps = WINED3DPMISCCAPS_TSSARGTEMP; 5508 caps->TextureOpCaps = WINED3DTEXOPCAPS_DISABLE | 5509 WINED3DTEXOPCAPS_SELECTARG1 | 5510 WINED3DTEXOPCAPS_SELECTARG2 | 5511 WINED3DTEXOPCAPS_MODULATE4X | 5512 WINED3DTEXOPCAPS_MODULATE2X | 5513 WINED3DTEXOPCAPS_MODULATE | 5514 WINED3DTEXOPCAPS_ADDSIGNED2X | 5515 WINED3DTEXOPCAPS_ADDSIGNED | 5516 WINED3DTEXOPCAPS_ADD | 5517 WINED3DTEXOPCAPS_SUBTRACT | 5518 WINED3DTEXOPCAPS_ADDSMOOTH | 5519 WINED3DTEXOPCAPS_BLENDCURRENTALPHA | 5520 WINED3DTEXOPCAPS_BLENDFACTORALPHA | 5521 WINED3DTEXOPCAPS_BLENDTEXTUREALPHA | 5522 WINED3DTEXOPCAPS_BLENDDIFFUSEALPHA | 5523 WINED3DTEXOPCAPS_BLENDTEXTUREALPHAPM | 5524 WINED3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR | 5525 WINED3DTEXOPCAPS_MODULATECOLOR_ADDALPHA | 5526 WINED3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA | 5527 WINED3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR | 5528 WINED3DTEXOPCAPS_DOTPRODUCT3 | 5529 WINED3DTEXOPCAPS_MULTIPLYADD | 5530 WINED3DTEXOPCAPS_LERP | 5531 WINED3DTEXOPCAPS_BUMPENVMAP | 5532 WINED3DTEXOPCAPS_BUMPENVMAPLUMINANCE; 5533 5534 /* TODO: Implement WINED3DTEXOPCAPS_PREMODULATE */ 5535 5536 caps->MaxTextureBlendStages = 8; 5537 caps->MaxSimultaneousTextures = min(gl_info->limits.fragment_samplers, 8); 5538 } 5539 5540 static void state_texfactor_arbfp(DWORD state_id, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context) 5541 { 5542 const struct wined3d_gl_info *gl_info = context->gl_info; 5543 const struct wined3d_state *state = &stateblock->state; 5544 IWineD3DDeviceImpl *device = stateblock->device; 5545 float col[4]; 5546 5547 /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite 5548 * application provided constants 5549 */ 5550 if (device->shader_backend == &arb_program_shader_backend) 5551 { 5552 if (use_ps(state)) return; 5553 5554 context->pshader_const_dirty[ARB_FFP_CONST_TFACTOR] = 1; 5555 device->highest_dirty_ps_const = max(device->highest_dirty_ps_const, ARB_FFP_CONST_TFACTOR + 1); 5556 } 5557 5558 D3DCOLORTOGLFLOAT4(state->render_states[WINED3DRS_TEXTUREFACTOR], col); 5559 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, col)); 5560 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, col)"); 5561 5562 } 5563 5564 static void state_arb_specularenable(DWORD state_id, 5565 IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context) 5566 { 5567 const struct wined3d_gl_info *gl_info = context->gl_info; 5568 const struct wined3d_state *state = &stateblock->state; 5569 IWineD3DDeviceImpl *device = stateblock->device; 5570 float col[4]; 5571 5572 /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite 5573 * application provided constants 5574 */ 5575 if (device->shader_backend == &arb_program_shader_backend) 5576 { 5577 if (use_ps(state)) return; 5578 5579 context->pshader_const_dirty[ARB_FFP_CONST_SPECULAR_ENABLE] = 1; 5580 device->highest_dirty_ps_const = max(device->highest_dirty_ps_const, ARB_FFP_CONST_SPECULAR_ENABLE + 1); 5581 } 5582 5583 if (state->render_states[WINED3DRS_SPECULARENABLE]) 5584 { 5585 /* The specular color has no alpha */ 5586 col[0] = 1.0f; col[1] = 1.0f; 5587 col[2] = 1.0f; col[3] = 0.0f; 5588 } else { 5589 col[0] = 0.0f; col[1] = 0.0f; 5590 col[2] = 0.0f; col[3] = 0.0f; 5591 } 5592 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)); 5593 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)"); 5594 } 5595 5596 static void set_bumpmat_arbfp(DWORD state_id, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context) 5597 { 5598 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 5599 const struct wined3d_gl_info *gl_info = context->gl_info; 5600 const struct wined3d_state *state = &stateblock->state; 5601 IWineD3DDeviceImpl *device = stateblock->device; 5602 float mat[2][2]; 5603 5604 if (use_ps(state)) 5605 { 5606 IWineD3DPixelShaderImpl *ps = state->pixel_shader; 5607 if (stage && (ps->baseShader.reg_maps.bumpmat & (1 << stage))) 5608 { 5609 /* The pixel shader has to know the bump env matrix. Do a constants update if it isn't scheduled 5610 * anyway 5611 */ 5612 if (!isStateDirty(context, STATE_PIXELSHADERCONSTANT)) 5613 stateblock_apply_state(STATE_PIXELSHADERCONSTANT, stateblock, context); 5614 } 5615 5616 if(device->shader_backend == &arb_program_shader_backend) { 5617 /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */ 5618 return; 5619 } 5620 } else if(device->shader_backend == &arb_program_shader_backend) { 5621 context->pshader_const_dirty[ARB_FFP_CONST_BUMPMAT(stage)] = 1; 5622 device->highest_dirty_ps_const = max(device->highest_dirty_ps_const, ARB_FFP_CONST_BUMPMAT(stage) + 1); 5623 } 5624 5625 mat[0][0] = *((float *)&state->texture_states[stage][WINED3DTSS_BUMPENVMAT00]); 5626 mat[0][1] = *((float *)&state->texture_states[stage][WINED3DTSS_BUMPENVMAT01]); 5627 mat[1][0] = *((float *)&state->texture_states[stage][WINED3DTSS_BUMPENVMAT10]); 5628 mat[1][1] = *((float *)&state->texture_states[stage][WINED3DTSS_BUMPENVMAT11]); 5629 5630 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])); 5631 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])"); 5632 } 5633 5634 static void tex_bumpenvlum_arbfp(DWORD state_id, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context) 5635 { 5636 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 5637 const struct wined3d_gl_info *gl_info = context->gl_info; 5638 const struct wined3d_state *state = &stateblock->state; 5639 IWineD3DDeviceImpl *device = stateblock->device; 5640 float param[4]; 5641 5642 if (use_ps(state)) 5643 { 5644 IWineD3DPixelShaderImpl *ps = state->pixel_shader; 5645 if (stage && (ps->baseShader.reg_maps.luminanceparams & (1 << stage))) 5646 { 5647 /* The pixel shader has to know the luminance offset. Do a constants update if it 5648 * isn't scheduled anyway 5649 */ 5650 if (!isStateDirty(context, STATE_PIXELSHADERCONSTANT)) 5651 stateblock_apply_state(STATE_PIXELSHADERCONSTANT, stateblock, context); 5652 } 5653 5654 if(device->shader_backend == &arb_program_shader_backend) { 5655 /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */ 5656 return; 5657 } 5658 } else if(device->shader_backend == &arb_program_shader_backend) { 5659 context->pshader_const_dirty[ARB_FFP_CONST_LUMINANCE(stage)] = 1; 5660 device->highest_dirty_ps_const = max(device->highest_dirty_ps_const, ARB_FFP_CONST_LUMINANCE(stage) + 1); 5661 } 5662 5663 param[0] = *((float *)&state->texture_states[stage][WINED3DTSS_BUMPENVLSCALE]); 5664 param[1] = *((float *)&state->texture_states[stage][WINED3DTSS_BUMPENVLOFFSET]); 5665 param[2] = 0.0f; 5666 param[3] = 0.0f; 5667 5668 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)); 5669 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)"); 5670 } 5671 5672 static const char *get_argreg(struct wined3d_shader_buffer *buffer, DWORD argnum, unsigned int stage, DWORD arg) 5673 { 5674 const char *ret; 5675 5676 if(arg == ARG_UNUSED) return "unused"; /* This is the marker for unused registers */ 5677 5678 switch(arg & WINED3DTA_SELECTMASK) { 5679 case WINED3DTA_DIFFUSE: 5680 ret = "fragment.color.primary"; break; 5681 5682 case WINED3DTA_CURRENT: 5683 if (!stage) ret = "fragment.color.primary"; 5684 else ret = "ret"; 5685 break; 5686 5687 case WINED3DTA_TEXTURE: 5688 switch(stage) { 5689 case 0: ret = "tex0"; break; 5690 case 1: ret = "tex1"; break; 5691 case 2: ret = "tex2"; break; 5692 case 3: ret = "tex3"; break; 5693 case 4: ret = "tex4"; break; 5694 case 5: ret = "tex5"; break; 5695 case 6: ret = "tex6"; break; 5696 case 7: ret = "tex7"; break; 5697 default: ret = "unknown texture"; 5698 } 5699 break; 5700 5701 case WINED3DTA_TFACTOR: 5702 ret = "tfactor"; break; 5703 5704 case WINED3DTA_SPECULAR: 5705 ret = "fragment.color.secondary"; break; 5706 5707 case WINED3DTA_TEMP: 5708 ret = "tempreg"; break; 5709 5710 case WINED3DTA_CONSTANT: 5711 FIXME("Implement perstage constants\n"); 5712 switch(stage) { 5713 case 0: ret = "const0"; break; 5714 case 1: ret = "const1"; break; 5715 case 2: ret = "const2"; break; 5716 case 3: ret = "const3"; break; 5717 case 4: ret = "const4"; break; 5718 case 5: ret = "const5"; break; 5719 case 6: ret = "const6"; break; 5720 case 7: ret = "const7"; break; 5721 default: ret = "unknown constant"; 5722 } 5723 break; 5724 5725 default: 5726 return "unknown"; 5727 } 5728 5729 if(arg & WINED3DTA_COMPLEMENT) { 5730 shader_addline(buffer, "SUB arg%u, const.x, %s;\n", argnum, ret); 5731 if(argnum == 0) ret = "arg0"; 5732 if(argnum == 1) ret = "arg1"; 5733 if(argnum == 2) ret = "arg2"; 5734 } 5735 if(arg & WINED3DTA_ALPHAREPLICATE) { 5736 shader_addline(buffer, "MOV arg%u, %s.w;\n", argnum, ret); 5737 if(argnum == 0) ret = "arg0"; 5738 if(argnum == 1) ret = "arg1"; 5739 if(argnum == 2) ret = "arg2"; 5740 } 5741 return ret; 5742 } 5743 5744 static void gen_ffp_instr(struct wined3d_shader_buffer *buffer, unsigned int stage, BOOL color, 5745 BOOL alpha, DWORD dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2) 5746 { 5747 const char *dstmask, *dstreg, *arg0, *arg1, *arg2; 5748 unsigned int mul = 1; 5749 BOOL mul_final_dest = FALSE; 5750 5751 if(color && alpha) dstmask = ""; 5752 else if(color) dstmask = ".xyz"; 5753 else dstmask = ".w"; 5754 5755 if(dst == tempreg) dstreg = "tempreg"; 5756 else dstreg = "ret"; 5757 5758 arg0 = get_argreg(buffer, 0, stage, dw_arg0); 5759 arg1 = get_argreg(buffer, 1, stage, dw_arg1); 5760 arg2 = get_argreg(buffer, 2, stage, dw_arg2); 5761 5762 switch(op) { 5763 case WINED3DTOP_DISABLE: 5764 if (!stage) shader_addline(buffer, "MOV %s%s, fragment.color.primary;\n", dstreg, dstmask); 5765 break; 5766 5767 case WINED3DTOP_SELECTARG2: 5768 arg1 = arg2; 5769 case WINED3DTOP_SELECTARG1: 5770 shader_addline(buffer, "MOV %s%s, %s;\n", dstreg, dstmask, arg1); 5771 break; 5772 5773 case WINED3DTOP_MODULATE4X: 5774 mul = 2; 5775 case WINED3DTOP_MODULATE2X: 5776 mul *= 2; 5777 if (!strcmp(dstreg, "result.color")) 5778 { 5779 dstreg = "ret"; 5780 mul_final_dest = TRUE; 5781 } 5782 case WINED3DTOP_MODULATE: 5783 shader_addline(buffer, "MUL %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 5784 break; 5785 5786 case WINED3DTOP_ADDSIGNED2X: 5787 mul = 2; 5788 if (!strcmp(dstreg, "result.color")) 5789 { 5790 dstreg = "ret"; 5791 mul_final_dest = TRUE; 5792 } 5793 case WINED3DTOP_ADDSIGNED: 5794 shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2); 5795 arg2 = "arg2"; 5796 case WINED3DTOP_ADD: 5797 shader_addline(buffer, "ADD_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 5798 break; 5799 5800 case WINED3DTOP_SUBTRACT: 5801 shader_addline(buffer, "SUB_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 5802 break; 5803 5804 case WINED3DTOP_ADDSMOOTH: 5805 shader_addline(buffer, "SUB arg1, const.x, %s;\n", arg1); 5806 shader_addline(buffer, "MAD_SAT %s%s, arg1, %s, %s;\n", dstreg, dstmask, arg2, arg1); 5807 break; 5808 5809 case WINED3DTOP_BLENDCURRENTALPHA: 5810 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_CURRENT); 5811 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 5812 break; 5813 case WINED3DTOP_BLENDFACTORALPHA: 5814 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TFACTOR); 5815 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 5816 break; 5817 case WINED3DTOP_BLENDTEXTUREALPHA: 5818 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE); 5819 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 5820 break; 5821 case WINED3DTOP_BLENDDIFFUSEALPHA: 5822 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_DIFFUSE); 5823 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 5824 break; 5825 5826 case WINED3DTOP_BLENDTEXTUREALPHAPM: 5827 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE); 5828 shader_addline(buffer, "SUB arg0.w, const.x, %s.w;\n", arg0); 5829 shader_addline(buffer, "MAD_SAT %s%s, %s, arg0.w, %s;\n", dstreg, dstmask, arg2, arg1); 5830 break; 5831 5832 /* D3DTOP_PREMODULATE ???? */ 5833 5834 case WINED3DTOP_MODULATEINVALPHA_ADDCOLOR: 5835 shader_addline(buffer, "SUB arg0.w, const.x, %s;\n", arg1); 5836 shader_addline(buffer, "MAD_SAT %s%s, arg0.w, %s, %s;\n", dstreg, dstmask, arg2, arg1); 5837 break; 5838 case WINED3DTOP_MODULATEALPHA_ADDCOLOR: 5839 shader_addline(buffer, "MAD_SAT %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg1); 5840 break; 5841 case WINED3DTOP_MODULATEINVCOLOR_ADDALPHA: 5842 shader_addline(buffer, "SUB arg0, const.x, %s;\n", arg1); 5843 shader_addline(buffer, "MAD_SAT %s%s, arg0, %s, %s.w;\n", dstreg, dstmask, arg2, arg1); 5844 break; 5845 case WINED3DTOP_MODULATECOLOR_ADDALPHA: 5846 shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s.w;\n", dstreg, dstmask, arg1, arg2, arg1); 5847 break; 5848 5849 case WINED3DTOP_DOTPRODUCT3: 5850 mul = 4; 5851 if (!strcmp(dstreg, "result.color")) 5852 { 5853 dstreg = "ret"; 5854 mul_final_dest = TRUE; 5855 } 5856 shader_addline(buffer, "SUB arg1, %s, const.w;\n", arg1); 5857 shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2); 5858 shader_addline(buffer, "DP3_SAT %s%s, arg1, arg2;\n", dstreg, dstmask); 5859 break; 5860 5861 case WINED3DTOP_MULTIPLYADD: 5862 shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg0); 5863 break; 5864 5865 case WINED3DTOP_LERP: 5866 /* The msdn is not quite right here */ 5867 shader_addline(buffer, "LRP %s%s, %s, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 5868 break; 5869 5870 case WINED3DTOP_BUMPENVMAP: 5871 case WINED3DTOP_BUMPENVMAPLUMINANCE: 5872 /* Those are handled in the first pass of the shader(generation pass 1 and 2) already */ 5873 break; 5874 5875 default: 5876 FIXME("Unhandled texture op %08x\n", op); 5877 } 5878 5879 if(mul == 2) { 5880 shader_addline(buffer, "MUL_SAT %s%s, %s, const.y;\n", mul_final_dest ? "result.color" : dstreg, dstmask, dstreg); 5881 } else if(mul == 4) { 5882 shader_addline(buffer, "MUL_SAT %s%s, %s, const.z;\n", mul_final_dest ? "result.color" : dstreg, dstmask, dstreg); 5883 } 5884 } 5885 5886 static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, IWineD3DStateBlockImpl *stateblock) 5887 { 5888 const struct wined3d_gl_info *gl_info = &stateblock->device->adapter->gl_info; 5889 unsigned int stage; 5890 struct wined3d_shader_buffer buffer; 5891 BOOL tex_read[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE}; 5892 BOOL bump_used[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE}; 5893 BOOL luminance_used[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE}; 5894 const char *textype; 5895 const char *instr, *sat; 5896 char colorcor_dst[8]; 5897 GLuint ret; 5898 DWORD arg0, arg1, arg2; 5899 BOOL tempreg_used = FALSE, tfactor_used = FALSE; 5900 BOOL op_equal; 5901 const char *final_combiner_src = "ret"; 5902 GLint pos; 5903 5904 /* Find out which textures are read */ 5905 for(stage = 0; stage < MAX_TEXTURES; stage++) { 5906 if(settings->op[stage].cop == WINED3DTOP_DISABLE) break; 5907 arg0 = settings->op[stage].carg0 & WINED3DTA_SELECTMASK; 5908 arg1 = settings->op[stage].carg1 & WINED3DTA_SELECTMASK; 5909 arg2 = settings->op[stage].carg2 & WINED3DTA_SELECTMASK; 5910 if(arg0 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 5911 if(arg1 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 5912 if(arg2 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 5913 5914 if(settings->op[stage].cop == WINED3DTOP_BLENDTEXTUREALPHA) tex_read[stage] = TRUE; 5915 if(settings->op[stage].cop == WINED3DTOP_BLENDTEXTUREALPHAPM) tex_read[stage] = TRUE; 5916 if(settings->op[stage].cop == WINED3DTOP_BUMPENVMAP) { 5917 bump_used[stage] = TRUE; 5918 tex_read[stage] = TRUE; 5919 } 5920 if(settings->op[stage].cop == WINED3DTOP_BUMPENVMAPLUMINANCE) { 5921 bump_used[stage] = TRUE; 5922 tex_read[stage] = TRUE; 5923 luminance_used[stage] = TRUE; 5924 } else if(settings->op[stage].cop == WINED3DTOP_BLENDFACTORALPHA) { 5925 tfactor_used = TRUE; 5926 } 5927 5928 if(arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) { 5929 tfactor_used = TRUE; 5930 } 5931 5932 if(settings->op[stage].dst == tempreg) tempreg_used = TRUE; 5933 if(arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) { 5934 tempreg_used = TRUE; 5935 } 5936 5937 if(settings->op[stage].aop == WINED3DTOP_DISABLE) continue; 5938 arg0 = settings->op[stage].aarg0 & WINED3DTA_SELECTMASK; 5939 arg1 = settings->op[stage].aarg1 & WINED3DTA_SELECTMASK; 5940 arg2 = settings->op[stage].aarg2 & WINED3DTA_SELECTMASK; 5941 if(arg0 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 5942 if(arg1 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 5943 if(arg2 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 5944 5945 if(arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) { 5946 tempreg_used = TRUE; 5947 } 5948 if(arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) { 5949 tfactor_used = TRUE; 5950 } 5951 } 5952 5953 /* Shader header */ 5954 if (!shader_buffer_init(&buffer)) 5955 { 5956 ERR("Failed to initialize shader buffer.\n"); 5957 return 0; 5958 } 5959 5960 shader_addline(&buffer, "!!ARBfp1.0\n"); 5961 5962 switch(settings->fog) { 5963 case FOG_OFF: break; 5964 case FOG_LINEAR: shader_addline(&buffer, "OPTION ARB_fog_linear;\n"); break; 5965 case FOG_EXP: shader_addline(&buffer, "OPTION ARB_fog_exp;\n"); break; 5966 case FOG_EXP2: shader_addline(&buffer, "OPTION ARB_fog_exp2;\n"); break; 5967 default: FIXME("Unexpected fog setting %d\n", settings->fog); 5968 } 5969 5970 shader_addline(&buffer, "PARAM const = {1, 2, 4, 0.5};\n"); 5971 shader_addline(&buffer, "TEMP TMP;\n"); 5972 shader_addline(&buffer, "TEMP ret;\n"); 5973 if(tempreg_used || settings->sRGB_write) shader_addline(&buffer, "TEMP tempreg;\n"); 5974 shader_addline(&buffer, "TEMP arg0;\n"); 5975 shader_addline(&buffer, "TEMP arg1;\n"); 5976 shader_addline(&buffer, "TEMP arg2;\n"); 5977 for(stage = 0; stage < MAX_TEXTURES; stage++) { 5978 if(!tex_read[stage]) continue; 5979 shader_addline(&buffer, "TEMP tex%u;\n", stage); 5980 if(!bump_used[stage]) continue; 5981 shader_addline(&buffer, "PARAM bumpmat%u = program.env[%u];\n", stage, ARB_FFP_CONST_BUMPMAT(stage)); 5982 if(!luminance_used[stage]) continue; 5983 shader_addline(&buffer, "PARAM luminance%u = program.env[%u];\n", stage, ARB_FFP_CONST_LUMINANCE(stage)); 5984 } 5985 if(tfactor_used) { 5986 shader_addline(&buffer, "PARAM tfactor = program.env[%u];\n", ARB_FFP_CONST_TFACTOR); 5987 } 5988 shader_addline(&buffer, "PARAM specular_enable = program.env[%u];\n", ARB_FFP_CONST_SPECULAR_ENABLE); 5989 5990 if(settings->sRGB_write) { 5991 shader_addline(&buffer, "PARAM srgb_consts1 = {%f, %f, %f, %f};\n", 5992 srgb_mul_low, srgb_cmp, srgb_pow, srgb_mul_high); 5993 shader_addline(&buffer, "PARAM srgb_consts2 = {%f, %f, %f, %f};\n", 5994 srgb_sub_high, 0.0, 0.0, 0.0); 5995 } 5996 5997 if (ffp_clip_emul(&stateblock->state) && settings->emul_clipplanes) 5998 shader_addline(&buffer, "KIL fragment.texcoord[7];\n"); 5999 6000 /* Generate texture sampling instructions) */ 6001 for(stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3DTOP_DISABLE; stage++) { 6002 if(!tex_read[stage]) continue; 6003 6004 switch(settings->op[stage].tex_type) { 6005 case tex_1d: textype = "1D"; break; 6006 case tex_2d: textype = "2D"; break; 6007 case tex_3d: textype = "3D"; break; 6008 case tex_cube: textype = "CUBE"; break; 6009 case tex_rect: textype = "RECT"; break; 6010 default: textype = "unexpected_textype"; break; 6011 } 6012 6013 if(settings->op[stage].cop == WINED3DTOP_BUMPENVMAP || 6014 settings->op[stage].cop == WINED3DTOP_BUMPENVMAPLUMINANCE) { 6015 sat = ""; 6016 } else { 6017 sat = "_SAT"; 6018 } 6019 6020 if(settings->op[stage].projected == proj_none) { 6021 instr = "TEX"; 6022 } else if(settings->op[stage].projected == proj_count4 || 6023 settings->op[stage].projected == proj_count3) { 6024 instr = "TXP"; 6025 } else { 6026 FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); 6027 instr = "TXP"; 6028 } 6029 6030 if(stage > 0 && 6031 (settings->op[stage - 1].cop == WINED3DTOP_BUMPENVMAP || 6032 settings->op[stage - 1].cop == WINED3DTOP_BUMPENVMAPLUMINANCE)) { 6033 shader_addline(&buffer, "SWZ arg1, bumpmat%u, x, z, 0, 0;\n", stage - 1); 6034 shader_addline(&buffer, "DP3 ret.x, arg1, tex%u;\n", stage - 1); 6035 shader_addline(&buffer, "SWZ arg1, bumpmat%u, y, w, 0, 0;\n", stage - 1); 6036 shader_addline(&buffer, "DP3 ret.y, arg1, tex%u;\n", stage - 1); 6037 6038 /* with projective textures, texbem only divides the static texture coord, not the displacement, 6039 * so multiply the displacement with the dividing parameter before passing it to TXP 6040 */ 6041 if (settings->op[stage].projected != proj_none) { 6042 if(settings->op[stage].projected == proj_count4) { 6043 shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].w;\n", stage); 6044 shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n", stage, stage); 6045 } else { 6046 shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].z;\n", stage); 6047 shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n", stage, stage); 6048 } 6049 } else { 6050 shader_addline(&buffer, "ADD ret, ret, fragment.texcoord[%u];\n", stage); 6051 } 6052 6053 shader_addline(&buffer, "%s%s tex%u, ret, texture[%u], %s;\n", 6054 instr, sat, stage, stage, textype); 6055 if(settings->op[stage - 1].cop == WINED3DTOP_BUMPENVMAPLUMINANCE) { 6056 shader_addline(&buffer, "MAD_SAT ret.x, tex%u.z, luminance%u.x, luminance%u.y;\n", 6057 stage - 1, stage - 1, stage - 1); 6058 shader_addline(&buffer, "MUL tex%u, tex%u, ret.x;\n", stage, stage); 6059 } 6060 } else if(settings->op[stage].projected == proj_count3) { 6061 shader_addline(&buffer, "MOV ret, fragment.texcoord[%u];\n", stage); 6062 shader_addline(&buffer, "MOV ret.w, ret.z;\n"); 6063 shader_addline(&buffer, "%s%s tex%u, ret, texture[%u], %s;\n", 6064 instr, sat, stage, stage, textype); 6065 } else { 6066 shader_addline(&buffer, "%s%s tex%u, fragment.texcoord[%u], texture[%u], %s;\n", 6067 instr, sat, stage, stage, stage, textype); 6068 } 6069 6070 sprintf(colorcor_dst, "tex%u", stage); 6071 gen_color_correction(&buffer, colorcor_dst, WINED3DSP_WRITEMASK_ALL, "const.x", "const.y", 6072 settings->op[stage].color_fixup); 6073 } 6074 6075 /* Generate the main shader */ 6076 for (stage = 0; stage < MAX_TEXTURES; ++stage) 6077 { 6078 if (settings->op[stage].cop == WINED3DTOP_DISABLE) 6079 { 6080 if (!stage) final_combiner_src = "fragment.color.primary"; 6081 break; 6082 } 6083 6084 if(settings->op[stage].cop == WINED3DTOP_SELECTARG1 && 6085 settings->op[stage].aop == WINED3DTOP_SELECTARG1) { 6086 op_equal = settings->op[stage].carg1 == settings->op[stage].aarg1; 6087 } else if(settings->op[stage].cop == WINED3DTOP_SELECTARG1 && 6088 settings->op[stage].aop == WINED3DTOP_SELECTARG2) { 6089 op_equal = settings->op[stage].carg1 == settings->op[stage].aarg2; 6090 } else if(settings->op[stage].cop == WINED3DTOP_SELECTARG2 && 6091 settings->op[stage].aop == WINED3DTOP_SELECTARG1) { 6092 op_equal = settings->op[stage].carg2 == settings->op[stage].aarg1; 6093 } else if(settings->op[stage].cop == WINED3DTOP_SELECTARG2 && 6094 settings->op[stage].aop == WINED3DTOP_SELECTARG2) { 6095 op_equal = settings->op[stage].carg2 == settings->op[stage].aarg2; 6096 } else { 6097 op_equal = settings->op[stage].aop == settings->op[stage].cop && 6098 settings->op[stage].carg0 == settings->op[stage].aarg0 && 6099 settings->op[stage].carg1 == settings->op[stage].aarg1 && 6100 settings->op[stage].carg2 == settings->op[stage].aarg2; 6101 } 6102 6103 if(settings->op[stage].aop == WINED3DTOP_DISABLE) { 6104 gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst, 6105 settings->op[stage].cop, settings->op[stage].carg0, 6106 settings->op[stage].carg1, settings->op[stage].carg2); 6107 if (!stage) 6108 shader_addline(&buffer, "MOV ret.w, fragment.color.primary.w;\n"); 6109 } 6110 else if (op_equal) 6111 { 6112 gen_ffp_instr(&buffer, stage, TRUE, TRUE, settings->op[stage].dst, 6113 settings->op[stage].cop, settings->op[stage].carg0, 6114 settings->op[stage].carg1, settings->op[stage].carg2); 6115 } else { 6116 gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst, 6117 settings->op[stage].cop, settings->op[stage].carg0, 6118 settings->op[stage].carg1, settings->op[stage].carg2); 6119 gen_ffp_instr(&buffer, stage, FALSE, TRUE, settings->op[stage].dst, 6120 settings->op[stage].aop, settings->op[stage].aarg0, 6121 settings->op[stage].aarg1, settings->op[stage].aarg2); 6122 } 6123 } 6124 6125 if(settings->sRGB_write) { 6126 shader_addline(&buffer, "MAD ret, fragment.color.secondary, specular_enable, %s;\n", final_combiner_src); 6127 arbfp_add_sRGB_correction(&buffer, "ret", "arg0", "arg1", "arg2", "tempreg", FALSE); 6128 shader_addline(&buffer, "MOV result.color, ret;\n"); 6129 } else { 6130 shader_addline(&buffer, "MAD result.color, fragment.color.secondary, specular_enable, %s;\n", final_combiner_src); 6131 } 6132 6133 /* Footer */ 6134 shader_addline(&buffer, "END\n"); 6135 6136 /* Generate the shader */ 6137 GL_EXTCALL(glGenProgramsARB(1, &ret)); 6138 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ret)); 6139 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 6140 strlen(buffer.buffer), buffer.buffer)); 6141 checkGLcall("glProgramStringARB()"); 6142 6143 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 6144 if (pos != -1) 6145 { 6146 FIXME("Fragment program error at position %d: %s\n\n", pos, 6147 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 6148 shader_arb_dump_program_source(buffer.buffer); 6149 } 6150 else 6151 { 6152 GLint native; 6153 6154 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 6155 checkGLcall("glGetProgramivARB()"); 6156 if (!native) WARN("Program exceeds native resource limits.\n"); 6157 } 6158 6159 shader_buffer_free(&buffer); 6160 return ret; 6161 } 6162 6163 static void fragment_prog_arbfp(DWORD state_id, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context) 6164 { 6165 const struct wined3d_gl_info *gl_info = context->gl_info; 6166 const struct wined3d_state *state = &stateblock->state; 6167 IWineD3DDeviceImpl *device = stateblock->device; 6168 struct shader_arb_priv *priv = device->fragment_priv; 6169 BOOL use_vshader = use_vs(state); 6170 BOOL use_pshader = use_ps(state); 6171 struct ffp_frag_settings settings; 6172 const struct arbfp_ffp_desc *desc; 6173 unsigned int i; 6174 6175 TRACE("state_id %#x, stateblock %p, context %p\n", state_id, stateblock, context); 6176 6177 if(isStateDirty(context, STATE_RENDER(WINED3DRS_FOGENABLE))) { 6178 if(!use_pshader && device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) { 6179 /* Reload fixed function constants since they collide with the pixel shader constants */ 6180 for(i = 0; i < MAX_TEXTURES; i++) { 6181 set_bumpmat_arbfp(STATE_TEXTURESTAGE(i, WINED3DTSS_BUMPENVMAT00), stateblock, context); 6182 } 6183 state_texfactor_arbfp(STATE_RENDER(WINED3DRS_TEXTUREFACTOR), stateblock, context); 6184 state_arb_specularenable(STATE_RENDER(WINED3DRS_SPECULARENABLE), stateblock, context); 6185 } else if(use_pshader && !isStateDirty(context, device->StateTable[STATE_VSHADER].representative)) { 6186 device->shader_backend->shader_select(context, use_pshader, use_vshader); 6187 } 6188 return; 6189 } 6190 6191 if(!use_pshader) { 6192 /* Find or create a shader implementing the fixed function pipeline settings, then activate it */ 6193 gen_ffp_frag_op(stateblock, &settings, FALSE); 6194 desc = (const struct arbfp_ffp_desc *)find_ffp_frag_shader(&priv->fragment_shaders, &settings); 6195 if(!desc) { 6196 struct arbfp_ffp_desc *new_desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*new_desc)); 6197 if (!new_desc) 6198 { 6199 ERR("Out of memory\n"); 6200 return; 6201 } 6202 new_desc->num_textures_used = 0; 6203 for (i = 0; i < gl_info->limits.texture_stages; ++i) 6204 { 6205 if(settings.op[i].cop == WINED3DTOP_DISABLE) break; 6206 new_desc->num_textures_used = i; 6207 } 6208 6209 memcpy(&new_desc->parent.settings, &settings, sizeof(settings)); 6210 new_desc->shader = gen_arbfp_ffp_shader(&settings, stateblock); 6211 add_ffp_frag_shader(&priv->fragment_shaders, &new_desc->parent); 6212 TRACE("Allocated fixed function replacement shader descriptor %p\n", new_desc); 6213 desc = new_desc; 6214 } 6215 6216 /* Now activate the replacement program. GL_FRAGMENT_PROGRAM_ARB is already active(however, note the 6217 * comment above the shader_select call below). If e.g. GLSL is active, the shader_select call will 6218 * deactivate it. 6219 */ 6220 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)); 6221 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)"); 6222 priv->current_fprogram_id = desc->shader; 6223 6224 if(device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) { 6225 /* Reload fixed function constants since they collide with the pixel shader constants */ 6226 for(i = 0; i < MAX_TEXTURES; i++) { 6227 set_bumpmat_arbfp(STATE_TEXTURESTAGE(i, WINED3DTSS_BUMPENVMAT00), stateblock, context); 6228 } 6229 state_texfactor_arbfp(STATE_RENDER(WINED3DRS_TEXTUREFACTOR), stateblock, context); 6230 state_arb_specularenable(STATE_RENDER(WINED3DRS_SPECULARENABLE), stateblock, context); 6231 } 6232 context->last_was_pshader = FALSE; 6233 } else { 6234 context->last_was_pshader = TRUE; 6235 } 6236 6237 /* Finally, select the shader. If a pixel shader is used, it will be set and enabled by the shader backend. 6238 * If this shader backend is arbfp(most likely), then it will simply overwrite the last fixed function replace- 6239 * ment shader. If the shader backend is not ARB, it currently is important that the opengl implementation 6240 * type overwrites GL_ARB_fragment_program. This is currently the case with GLSL. If we really want to use 6241 * atifs or nvrc pixel shaders with arb fragment programs we'd have to disable GL_FRAGMENT_PROGRAM_ARB here 6242 * 6243 * Don't call shader_select if the vertex shader is dirty, because it will be called later on by the vertex 6244 * shader handler 6245 */ 6246 if(!isStateDirty(context, device->StateTable[STATE_VSHADER].representative)) { 6247 device->shader_backend->shader_select(context, use_pshader, use_vshader); 6248 6249 if (!isStateDirty(context, STATE_VERTEXSHADERCONSTANT) && (use_vshader || use_pshader)) 6250 stateblock_apply_state(STATE_VERTEXSHADERCONSTANT, stateblock, context); 6251 } 6252 if (use_pshader) stateblock_apply_state(STATE_PIXELSHADERCONSTANT, stateblock, context); 6253 } 6254 6255 /* We can't link the fog states to the fragment state directly since the vertex pipeline links them 6256 * to FOGENABLE. A different linking in different pipeline parts can't be expressed in the combined 6257 * state table, so we need to handle that with a forwarding function. The other invisible side effect 6258 * is that changing the fog start and fog end(which links to FOGENABLE in vertex) results in the 6259 * fragment_prog_arbfp function being called because FOGENABLE is dirty, which calls this function here 6260 */ 6261 static void state_arbfp_fog(DWORD state_id, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context) 6262 { 6263 const struct wined3d_state *state = &stateblock->state; 6264 enum fogsource new_source; 6265 6266 TRACE("state_id %#x, stateblock %p, context %p\n", state_id, stateblock, context); 6267 6268 if(!isStateDirty(context, STATE_PIXELSHADER)) { 6269 fragment_prog_arbfp(state_id, stateblock, context); 6270 } 6271 6272 if (!state->render_states[WINED3DRS_FOGENABLE]) return; 6273 6274 if (state->render_states[WINED3DRS_FOGTABLEMODE] == WINED3DFOG_NONE) 6275 { 6276 if (use_vs(state)) 6277 { 6278 new_source = FOGSOURCE_VS; 6279 } 6280 else 6281 { 6282 if (state->render_states[WINED3DRS_FOGVERTEXMODE] == WINED3DFOG_NONE || context->last_was_rhw) 6283 new_source = FOGSOURCE_COORD; 6284 else 6285 new_source = FOGSOURCE_FFP; 6286 } 6287 } else { 6288 new_source = FOGSOURCE_FFP; 6289 } 6290 if(new_source != context->fog_source) { 6291 context->fog_source = new_source; 6292 state_fogstartend(STATE_RENDER(WINED3DRS_FOGSTART), stateblock, context); 6293 } 6294 } 6295 6296 static void textransform(DWORD state, IWineD3DStateBlockImpl *stateblock, struct wined3d_context *context) 6297 { 6298 if(!isStateDirty(context, STATE_PIXELSHADER)) { 6299 fragment_prog_arbfp(state, stateblock, context); 6300 } 6301 } 6302 6303 static const struct StateEntryTemplate arbfp_fragmentstate_template[] = { 6304 {STATE_RENDER(WINED3DRS_TEXTUREFACTOR), { STATE_RENDER(WINED3DRS_TEXTUREFACTOR), state_texfactor_arbfp }, WINED3D_GL_EXT_NONE }, 6305 {STATE_TEXTURESTAGE(0, WINED3DTSS_COLOROP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6306 {STATE_TEXTURESTAGE(0, WINED3DTSS_COLORARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6307 {STATE_TEXTURESTAGE(0, WINED3DTSS_COLORARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6308 {STATE_TEXTURESTAGE(0, WINED3DTSS_COLORARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6309 {STATE_TEXTURESTAGE(0, WINED3DTSS_ALPHAOP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6310 {STATE_TEXTURESTAGE(0, WINED3DTSS_ALPHAARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6311 {STATE_TEXTURESTAGE(0, WINED3DTSS_ALPHAARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6312 {STATE_TEXTURESTAGE(0, WINED3DTSS_ALPHAARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6313 {STATE_TEXTURESTAGE(0, WINED3DTSS_RESULTARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6314 {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6315 {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6316 {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6317 {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6318 {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6319 {STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(0, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6320 {STATE_TEXTURESTAGE(1, WINED3DTSS_COLOROP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6321 {STATE_TEXTURESTAGE(1, WINED3DTSS_COLORARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6322 {STATE_TEXTURESTAGE(1, WINED3DTSS_COLORARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6323 {STATE_TEXTURESTAGE(1, WINED3DTSS_COLORARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6324 {STATE_TEXTURESTAGE(1, WINED3DTSS_ALPHAOP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6325 {STATE_TEXTURESTAGE(1, WINED3DTSS_ALPHAARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6326 {STATE_TEXTURESTAGE(1, WINED3DTSS_ALPHAARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6327 {STATE_TEXTURESTAGE(1, WINED3DTSS_ALPHAARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6328 {STATE_TEXTURESTAGE(1, WINED3DTSS_RESULTARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6329 {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6330 {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6331 {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6332 {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6333 {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6334 {STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(1, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6335 {STATE_TEXTURESTAGE(2, WINED3DTSS_COLOROP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6336 {STATE_TEXTURESTAGE(2, WINED3DTSS_COLORARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6337 {STATE_TEXTURESTAGE(2, WINED3DTSS_COLORARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6338 {STATE_TEXTURESTAGE(2, WINED3DTSS_COLORARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6339 {STATE_TEXTURESTAGE(2, WINED3DTSS_ALPHAOP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6340 {STATE_TEXTURESTAGE(2, WINED3DTSS_ALPHAARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6341 {STATE_TEXTURESTAGE(2, WINED3DTSS_ALPHAARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6342 {STATE_TEXTURESTAGE(2, WINED3DTSS_ALPHAARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6343 {STATE_TEXTURESTAGE(2, WINED3DTSS_RESULTARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6344 {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6345 {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6346 {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6347 {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6348 {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6349 {STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(2, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6350 {STATE_TEXTURESTAGE(3, WINED3DTSS_COLOROP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6351 {STATE_TEXTURESTAGE(3, WINED3DTSS_COLORARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6352 {STATE_TEXTURESTAGE(3, WINED3DTSS_COLORARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6353 {STATE_TEXTURESTAGE(3, WINED3DTSS_COLORARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6354 {STATE_TEXTURESTAGE(3, WINED3DTSS_ALPHAOP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6355 {STATE_TEXTURESTAGE(3, WINED3DTSS_ALPHAARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6356 {STATE_TEXTURESTAGE(3, WINED3DTSS_ALPHAARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6357 {STATE_TEXTURESTAGE(3, WINED3DTSS_ALPHAARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6358 {STATE_TEXTURESTAGE(3, WINED3DTSS_RESULTARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6359 {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6360 {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6361 {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6362 {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6363 {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6364 {STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(3, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6365 {STATE_TEXTURESTAGE(4, WINED3DTSS_COLOROP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6366 {STATE_TEXTURESTAGE(4, WINED3DTSS_COLORARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6367 {STATE_TEXTURESTAGE(4, WINED3DTSS_COLORARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6368 {STATE_TEXTURESTAGE(4, WINED3DTSS_COLORARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6369 {STATE_TEXTURESTAGE(4, WINED3DTSS_ALPHAOP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6370 {STATE_TEXTURESTAGE(4, WINED3DTSS_ALPHAARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6371 {STATE_TEXTURESTAGE(4, WINED3DTSS_ALPHAARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6372 {STATE_TEXTURESTAGE(4, WINED3DTSS_ALPHAARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6373 {STATE_TEXTURESTAGE(4, WINED3DTSS_RESULTARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6374 {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6375 {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6376 {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6377 {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6378 {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6379 {STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(4, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6380 {STATE_TEXTURESTAGE(5, WINED3DTSS_COLOROP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6381 {STATE_TEXTURESTAGE(5, WINED3DTSS_COLORARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6382 {STATE_TEXTURESTAGE(5, WINED3DTSS_COLORARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6383 {STATE_TEXTURESTAGE(5, WINED3DTSS_COLORARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6384 {STATE_TEXTURESTAGE(5, WINED3DTSS_ALPHAOP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6385 {STATE_TEXTURESTAGE(5, WINED3DTSS_ALPHAARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6386 {STATE_TEXTURESTAGE(5, WINED3DTSS_ALPHAARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6387 {STATE_TEXTURESTAGE(5, WINED3DTSS_ALPHAARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6388 {STATE_TEXTURESTAGE(5, WINED3DTSS_RESULTARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6389 {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6390 {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6391 {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6392 {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6393 {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6394 {STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(5, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6395 {STATE_TEXTURESTAGE(6, WINED3DTSS_COLOROP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6396 {STATE_TEXTURESTAGE(6, WINED3DTSS_COLORARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6397 {STATE_TEXTURESTAGE(6, WINED3DTSS_COLORARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6398 {STATE_TEXTURESTAGE(6, WINED3DTSS_COLORARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6399 {STATE_TEXTURESTAGE(6, WINED3DTSS_ALPHAOP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6400 {STATE_TEXTURESTAGE(6, WINED3DTSS_ALPHAARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6401 {STATE_TEXTURESTAGE(6, WINED3DTSS_ALPHAARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6402 {STATE_TEXTURESTAGE(6, WINED3DTSS_ALPHAARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6403 {STATE_TEXTURESTAGE(6, WINED3DTSS_RESULTARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6404 {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6405 {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6406 {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6407 {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6408 {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6409 {STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(6, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6410 {STATE_TEXTURESTAGE(7, WINED3DTSS_COLOROP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6411 {STATE_TEXTURESTAGE(7, WINED3DTSS_COLORARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6412 {STATE_TEXTURESTAGE(7, WINED3DTSS_COLORARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6413 {STATE_TEXTURESTAGE(7, WINED3DTSS_COLORARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6414 {STATE_TEXTURESTAGE(7, WINED3DTSS_ALPHAOP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6415 {STATE_TEXTURESTAGE(7, WINED3DTSS_ALPHAARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6416 {STATE_TEXTURESTAGE(7, WINED3DTSS_ALPHAARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6417 {STATE_TEXTURESTAGE(7, WINED3DTSS_ALPHAARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6418 {STATE_TEXTURESTAGE(7, WINED3DTSS_RESULTARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6419 {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 6420 {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT01), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6421 {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT10), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6422 {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT11), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVMAT00), NULL }, WINED3D_GL_EXT_NONE }, 6423 {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLSCALE), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 6424 {STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLOFFSET), { STATE_TEXTURESTAGE(7, WINED3DTSS_BUMPENVLSCALE), NULL }, WINED3D_GL_EXT_NONE }, 6425 {STATE_SAMPLER(0), { STATE_SAMPLER(0), sampler_texdim }, WINED3D_GL_EXT_NONE }, 6426 {STATE_SAMPLER(1), { STATE_SAMPLER(1), sampler_texdim }, WINED3D_GL_EXT_NONE }, 6427 {STATE_SAMPLER(2), { STATE_SAMPLER(2), sampler_texdim }, WINED3D_GL_EXT_NONE }, 6428 {STATE_SAMPLER(3), { STATE_SAMPLER(3), sampler_texdim }, WINED3D_GL_EXT_NONE }, 6429 {STATE_SAMPLER(4), { STATE_SAMPLER(4), sampler_texdim }, WINED3D_GL_EXT_NONE }, 6430 {STATE_SAMPLER(5), { STATE_SAMPLER(5), sampler_texdim }, WINED3D_GL_EXT_NONE }, 6431 {STATE_SAMPLER(6), { STATE_SAMPLER(6), sampler_texdim }, WINED3D_GL_EXT_NONE }, 6432 {STATE_SAMPLER(7), { STATE_SAMPLER(7), sampler_texdim }, WINED3D_GL_EXT_NONE }, 6433 {STATE_PIXELSHADER, { STATE_PIXELSHADER, fragment_prog_arbfp }, WINED3D_GL_EXT_NONE }, 6434 {STATE_RENDER(WINED3DRS_FOGENABLE), { STATE_RENDER(WINED3DRS_FOGENABLE), state_arbfp_fog }, WINED3D_GL_EXT_NONE }, 6435 {STATE_RENDER(WINED3DRS_FOGTABLEMODE), { STATE_RENDER(WINED3DRS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6436 {STATE_RENDER(WINED3DRS_FOGVERTEXMODE), { STATE_RENDER(WINED3DRS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE }, 6437 {STATE_RENDER(WINED3DRS_FOGSTART), { STATE_RENDER(WINED3DRS_FOGSTART), state_fogstartend }, WINED3D_GL_EXT_NONE }, 6438 {STATE_RENDER(WINED3DRS_FOGEND), { STATE_RENDER(WINED3DRS_FOGSTART), NULL }, WINED3D_GL_EXT_NONE }, 6439 {STATE_RENDER(WINED3DRS_SRGBWRITEENABLE), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 6440 {STATE_RENDER(WINED3DRS_FOGCOLOR), { STATE_RENDER(WINED3DRS_FOGCOLOR), state_fogcolor }, WINED3D_GL_EXT_NONE }, 6441 {STATE_RENDER(WINED3DRS_FOGDENSITY), { STATE_RENDER(WINED3DRS_FOGDENSITY), state_fogdensity }, WINED3D_GL_EXT_NONE }, 6442 {STATE_TEXTURESTAGE(0,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(0, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE }, 6443 {STATE_TEXTURESTAGE(1,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(1, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE }, 6444 {STATE_TEXTURESTAGE(2,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(2, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE }, 6445 {STATE_TEXTURESTAGE(3,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(3, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE }, 6446 {STATE_TEXTURESTAGE(4,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(4, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE }, 6447 {STATE_TEXTURESTAGE(5,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(5, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE }, 6448 {STATE_TEXTURESTAGE(6,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(6, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE }, 6449 {STATE_TEXTURESTAGE(7,WINED3DTSS_TEXTURETRANSFORMFLAGS),{STATE_TEXTURESTAGE(7, WINED3DTSS_TEXTURETRANSFORMFLAGS), textransform }, WINED3D_GL_EXT_NONE }, 6450 {STATE_RENDER(WINED3DRS_SPECULARENABLE), { STATE_RENDER(WINED3DRS_SPECULARENABLE), state_arb_specularenable}, WINED3D_GL_EXT_NONE }, 6451 {0 /* Terminate */, { 0, 0 }, WINED3D_GL_EXT_NONE }, 6452 }; 6453 6454 const struct fragment_pipeline arbfp_fragment_pipeline = { 6455 arbfp_enable, 6456 arbfp_get_caps, 6457 arbfp_alloc, 6458 arbfp_free, 6459 shader_arb_color_fixup_supported, 6460 arbfp_fragmentstate_template, 6461 TRUE /* We can disable projected textures */ 6462 }; 6463 6464 struct arbfp_blit_priv { 6465 GLenum yuy2_rect_shader, yuy2_2d_shader; 6466 GLenum uyvy_rect_shader, uyvy_2d_shader; 6467 GLenum yv12_rect_shader, yv12_2d_shader; 6468 GLenum p8_rect_shader, p8_2d_shader; 6469 GLuint palette_texture; 6470 }; 6471 6472 static HRESULT arbfp_blit_alloc(IWineD3DDevice *iface) { 6473 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface; 6474 device->blit_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct arbfp_blit_priv)); 6475 if(!device->blit_priv) { 6476 ERR("Out of memory\n"); 6477 return E_OUTOFMEMORY; 6478 } 6479 return WINED3D_OK; 6480 } 6481 6482 /* Context activation is done by the caller. */ 6483 static void arbfp_blit_free(IWineD3DDevice *iface) { 6484 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface; 6485 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 6486 struct arbfp_blit_priv *priv = device->blit_priv; 6487 6488 ENTER_GL(); 6489 GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_rect_shader)); 6490 GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_2d_shader)); 6491 GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_rect_shader)); 6492 GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_2d_shader)); 6493 GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_rect_shader)); 6494 GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_2d_shader)); 6495 GL_EXTCALL(glDeleteProgramsARB(1, &priv->p8_rect_shader)); 6496 GL_EXTCALL(glDeleteProgramsARB(1, &priv->p8_2d_shader)); 6497 checkGLcall("Delete yuv and p8 programs"); 6498 6499 if(priv->palette_texture) glDeleteTextures(1, &priv->palette_texture); 6500 LEAVE_GL(); 6501 6502 HeapFree(GetProcessHeap(), 0, device->blit_priv); 6503 device->blit_priv = NULL; 6504 } 6505 6506 static BOOL gen_planar_yuv_read(struct wined3d_shader_buffer *buffer, enum complex_fixup fixup, 6507 GLenum textype, char *luminance) 6508 { 6509 char chroma; 6510 const char *tex, *texinstr; 6511 6512 if (fixup == COMPLEX_FIXUP_UYVY) { 6513 chroma = 'x'; 6514 *luminance = 'w'; 6515 } else { 6516 chroma = 'w'; 6517 *luminance = 'x'; 6518 } 6519 switch(textype) { 6520 case GL_TEXTURE_2D: tex = "2D"; texinstr = "TXP"; break; 6521 case GL_TEXTURE_RECTANGLE_ARB: tex = "RECT"; texinstr = "TEX"; break; 6522 default: 6523 /* This is more tricky than just replacing the texture type - we have to navigate 6524 * properly in the texture to find the correct chroma values 6525 */ 6526 FIXME("Implement yuv correction for non-2d, non-rect textures\n"); 6527 return FALSE; 6528 } 6529 6530 /* First we have to read the chroma values. This means we need at least two pixels(no filtering), 6531 * or 4 pixels(with filtering). To get the unmodified chromas, we have to rid ourselves of the 6532 * filtering when we sample the texture. 6533 * 6534 * These are the rules for reading the chroma: 6535 * 6536 * Even pixel: Cr 6537 * Even pixel: U 6538 * Odd pixel: V 6539 * 6540 * So we have to get the sampling x position in non-normalized coordinates in integers 6541 */ 6542 if(textype != GL_TEXTURE_RECTANGLE_ARB) { 6543 shader_addline(buffer, "MUL texcrd.xy, fragment.texcoord[0], size.x;\n"); 6544 shader_addline(buffer, "MOV texcrd.w, size.x;\n"); 6545 } else { 6546 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 6547 } 6548 /* We must not allow filtering between pixel x and x+1, this would mix U and V 6549 * Vertical filtering is ok. However, bear in mind that the pixel center is at 6550 * 0.5, so add 0.5. 6551 */ 6552 shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n"); 6553 shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.y;\n"); 6554 6555 /* Divide the x coordinate by 0.5 and get the fraction. This gives 0.25 and 0.75 for the 6556 * even and odd pixels respectively 6557 */ 6558 shader_addline(buffer, "MUL texcrd2, texcrd, coef.y;\n"); 6559 shader_addline(buffer, "FRC texcrd2, texcrd2;\n"); 6560 6561 /* Sample Pixel 1 */ 6562 shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex); 6563 6564 /* Put the value into either of the chroma values */ 6565 shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n"); 6566 shader_addline(buffer, "MUL chroma.x, luminance.%c, temp.x;\n", chroma); 6567 shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n"); 6568 shader_addline(buffer, "MUL chroma.y, luminance.%c, temp.x;\n", chroma); 6569 6570 /* Sample pixel 2. If we read an even pixel(SLT above returned 1), sample 6571 * the pixel right to the current one. Otherwise, sample the left pixel. 6572 * Bias and scale the SLT result to -1;1 and add it to the texcrd.x. 6573 */ 6574 shader_addline(buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n"); 6575 shader_addline(buffer, "ADD texcrd.x, texcrd, temp.x;\n"); 6576 shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex); 6577 6578 /* Put the value into the other chroma */ 6579 shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n"); 6580 shader_addline(buffer, "MAD chroma.y, luminance.%c, temp.x, chroma.y;\n", chroma); 6581 shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n"); 6582 shader_addline(buffer, "MAD chroma.x, luminance.%c, temp.x, chroma.x;\n", chroma); 6583 6584 /* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of 6585 * the current one and lerp the two U and V values 6586 */ 6587 6588 /* This gives the correctly filtered luminance value */ 6589 shader_addline(buffer, "TEX luminance, fragment.texcoord[0], texture[0], %s;\n", tex); 6590 6591 return TRUE; 6592 } 6593 6594 static BOOL gen_yv12_read(struct wined3d_shader_buffer *buffer, GLenum textype, char *luminance) 6595 { 6596 const char *tex; 6597 6598 switch(textype) { 6599 case GL_TEXTURE_2D: tex = "2D"; break; 6600 case GL_TEXTURE_RECTANGLE_ARB: tex = "RECT"; break; 6601 default: 6602 FIXME("Implement yv12 correction for non-2d, non-rect textures\n"); 6603 return FALSE; 6604 } 6605 6606 /* YV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2) 6607 * V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So the effective 6608 * bitdepth is 12 bits per pixel. Since the U and V planes have only half the 6609 * pitch of the luminance plane, the packing into the gl texture is a bit 6610 * unfortunate. If the whole texture is interpreted as luminance data it looks 6611 * approximately like this: 6612 * 6613 * +----------------------------------+---- 6614 * | | 6615 * | | 6616 * | | 6617 * | | 6618 * | | 2 6619 * | LUMINANCE | - 6620 * | | 3 6621 * | | 6622 * | | 6623 * | | 6624 * | | 6625 * +----------------+-----------------+---- 6626 * | | | 6627 * | U even rows | U odd rows | 6628 * | | | 1 6629 * +----------------+------------------ - 6630 * | | | 3 6631 * | V even rows | V odd rows | 6632 * | | | 6633 * +----------------+-----------------+---- 6634 * | | | 6635 * | 0.5 | 0.5 | 6636 * 6637 * So it appears as if there are 4 chroma images, but in fact the odd rows 6638 * in the chroma images are in the same row as the even ones. So its is 6639 * kinda tricky to read 6640 * 6641 * When reading from rectangle textures, keep in mind that the input y coordinates 6642 * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height 6643 */ 6644 shader_addline(buffer, "PARAM yv12_coef = {%f, %f, %f, %f};\n", 6645 2.0f / 3.0f, 1.0f / 6.0f, (2.0f / 3.0f) + (1.0f / 6.0f), 1.0f / 3.0f); 6646 6647 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 6648 /* the chroma planes have only half the width */ 6649 shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n"); 6650 6651 /* The first value is between 2/3 and 5/6th of the texture's height, so scale+bias 6652 * the coordinate. Also read the right side of the image when reading odd lines 6653 * 6654 * Don't forget to clamp the y values in into the range, otherwise we'll get filtering 6655 * bleeding 6656 */ 6657 if(textype == GL_TEXTURE_2D) { 6658 6659 shader_addline(buffer, "RCP chroma.w, size.y;\n"); 6660 6661 shader_addline(buffer, "MUL texcrd2.y, texcrd.y, size.y;\n"); 6662 6663 shader_addline(buffer, "FLR texcrd2.y, texcrd2.y;\n"); 6664 shader_addline(buffer, "MAD texcrd.y, texcrd.y, yv12_coef.y, yv12_coef.x;\n"); 6665 6666 /* Read odd lines from the right side(add size * 0.5 to the x coordinate */ 6667 shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */ 6668 shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n"); 6669 shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n"); 6670 shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n"); 6671 6672 /* clamp, keep the half pixel origin in mind */ 6673 shader_addline(buffer, "MAD temp.y, coef.y, chroma.w, yv12_coef.x;\n"); 6674 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 6675 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.z;\n"); 6676 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 6677 } else { 6678 /* Read from [size - size+size/4] */ 6679 shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n"); 6680 shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.w, size.y;\n"); 6681 6682 /* Read odd lines from the right side(add size * 0.5 to the x coordinate */ 6683 shader_addline(buffer, "ADD texcrd2.x, texcrd.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */ 6684 shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n"); 6685 shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n"); 6686 shader_addline(buffer, "MUL texcrd2.x, texcrd2.x, size.x;\n"); 6687 shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n"); 6688 6689 /* Make sure to read exactly from the pixel center */ 6690 shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n"); 6691 shader_addline(buffer, "ADD texcrd.y, texcrd.y, coef.y;\n"); 6692 6693 /* Clamp */ 6694 shader_addline(buffer, "MAD temp.y, size.y, coef.w, size.y;\n"); 6695 shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n"); 6696 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 6697 shader_addline(buffer, "ADD temp.y, size.y, -coef.y;\n"); 6698 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 6699 } 6700 /* Read the texture, put the result into the output register */ 6701 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 6702 shader_addline(buffer, "MOV chroma.x, temp.w;\n"); 6703 6704 /* The other chroma value is 1/6th of the texture lower, from 5/6th to 6/6th 6705 * No need to clamp because we're just reusing the already clamped value from above 6706 */ 6707 if(textype == GL_TEXTURE_2D) { 6708 shader_addline(buffer, "ADD texcrd.y, texcrd.y, yv12_coef.y;\n"); 6709 } else { 6710 shader_addline(buffer, "MAD texcrd.y, size.y, coef.w, texcrd.y;\n"); 6711 } 6712 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 6713 shader_addline(buffer, "MOV chroma.y, temp.w;\n"); 6714 6715 /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate. 6716 * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance 6717 * values due to filtering 6718 */ 6719 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 6720 if(textype == GL_TEXTURE_2D) { 6721 /* Multiply the y coordinate by 2/3 and clamp it */ 6722 shader_addline(buffer, "MUL texcrd.y, texcrd.y, yv12_coef.x;\n"); 6723 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.x;\n"); 6724 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 6725 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 6726 } else { 6727 /* Reading from texture_rectangles is pretty straightforward, just use the unmodified 6728 * texture coordinate. It is still a good idea to clamp it though, since the opengl texture 6729 * is bigger 6730 */ 6731 shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n"); 6732 shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n"); 6733 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 6734 } 6735 *luminance = 'a'; 6736 6737 return TRUE; 6738 } 6739 6740 static GLuint gen_p8_shader(IWineD3DDeviceImpl *device, GLenum textype) 6741 { 6742 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 6743 GLenum shader; 6744 struct wined3d_shader_buffer buffer; 6745 struct arbfp_blit_priv *priv = device->blit_priv; 6746 GLint pos; 6747 6748 /* Shader header */ 6749 if (!shader_buffer_init(&buffer)) 6750 { 6751 ERR("Failed to initialize shader buffer.\n"); 6752 return 0; 6753 } 6754 6755 ENTER_GL(); 6756 GL_EXTCALL(glGenProgramsARB(1, &shader)); 6757 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 6758 LEAVE_GL(); 6759 if(!shader) { 6760 shader_buffer_free(&buffer); 6761 return 0; 6762 } 6763 6764 shader_addline(&buffer, "!!ARBfp1.0\n"); 6765 shader_addline(&buffer, "TEMP index;\n"); 6766 6767 /* { 255/256, 0.5/255*255/256, 0, 0 } */ 6768 shader_addline(&buffer, "PARAM constants = { 0.996, 0.00195, 0, 0 };\n"); 6769 6770 /* The alpha-component contains the palette index */ 6771 if(textype == GL_TEXTURE_RECTANGLE_ARB) 6772 shader_addline(&buffer, "TXP index, fragment.texcoord[0], texture[0], RECT;\n"); 6773 else 6774 shader_addline(&buffer, "TEX index, fragment.texcoord[0], texture[0], 2D;\n"); 6775 6776 /* Scale the index by 255/256 and add a bias of '0.5' in order to sample in the middle */ 6777 shader_addline(&buffer, "MAD index.a, index.a, constants.x, constants.y;\n"); 6778 6779 /* Use the alpha-component as an index in the palette to get the final color */ 6780 shader_addline(&buffer, "TEX result.color, index.a, texture[1], 1D;\n"); 6781 shader_addline(&buffer, "END\n"); 6782 6783 ENTER_GL(); 6784 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 6785 strlen(buffer.buffer), buffer.buffer)); 6786 checkGLcall("glProgramStringARB()"); 6787 6788 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 6789 if (pos != -1) 6790 { 6791 FIXME("Fragment program error at position %d: %s\n\n", pos, 6792 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 6793 shader_arb_dump_program_source(buffer.buffer); 6794 } 6795 6796 if (textype == GL_TEXTURE_RECTANGLE_ARB) 6797 priv->p8_rect_shader = shader; 6798 else 6799 priv->p8_2d_shader = shader; 6800 6801 shader_buffer_free(&buffer); 6802 LEAVE_GL(); 6803 6804 return shader; 6805 } 6806 6807 /* Context activation is done by the caller. */ 6808 static void upload_palette(IWineD3DSurfaceImpl *surface) 6809 { 6810 BYTE table[256][4]; 6811 IWineD3DDeviceImpl *device = surface->resource.device; 6812 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 6813 struct arbfp_blit_priv *priv = device->blit_priv; 6814 BOOL colorkey = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE; 6815 6816 d3dfmt_p8_init_palette(surface, table, colorkey); 6817 6818 ENTER_GL(); 6819 if (!priv->palette_texture) 6820 glGenTextures(1, &priv->palette_texture); 6821 6822 GL_EXTCALL(glActiveTextureARB(GL_TEXTURE1)); 6823 glBindTexture(GL_TEXTURE_1D, priv->palette_texture); 6824 6825 glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); 6826 6827 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 6828 /* Make sure we have discrete color levels. */ 6829 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 6830 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 6831 /* Upload the palette */ 6832 /* TODO: avoid unneeed uploads in the future by adding some SFLAG_PALETTE_DIRTY mechanism */ 6833 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, 256, 0, GL_RGBA, GL_UNSIGNED_BYTE, table); 6834 6835 /* Switch back to unit 0 in which the 2D texture will be stored. */ 6836 GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0)); 6837 LEAVE_GL(); 6838 } 6839 6840 /* Context activation is done by the caller. */ 6841 static GLuint gen_yuv_shader(IWineD3DDeviceImpl *device, enum complex_fixup yuv_fixup, GLenum textype) 6842 { 6843 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 6844 GLenum shader; 6845 struct wined3d_shader_buffer buffer; 6846 char luminance_component; 6847 struct arbfp_blit_priv *priv = device->blit_priv; 6848 GLint pos; 6849 6850 /* Shader header */ 6851 if (!shader_buffer_init(&buffer)) 6852 { 6853 ERR("Failed to initialize shader buffer.\n"); 6854 return 0; 6855 } 6856 6857 ENTER_GL(); 6858 GL_EXTCALL(glGenProgramsARB(1, &shader)); 6859 checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))"); 6860 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 6861 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)"); 6862 LEAVE_GL(); 6863 if(!shader) { 6864 shader_buffer_free(&buffer); 6865 return 0; 6866 } 6867 6868 /* The YUY2 and UYVY formats contain two pixels packed into a 32 bit macropixel, 6869 * giving effectively 16 bit per pixel. The color consists of a luminance(Y) and 6870 * two chroma(U and V) values. Each macropixel has two luminance values, one for 6871 * each single pixel it contains, and one U and one V value shared between both 6872 * pixels. 6873 * 6874 * The data is loaded into an A8L8 texture. With YUY2, the luminance component 6875 * contains the luminance and alpha the chroma. With UYVY it is vice versa. Thus 6876 * take the format into account when generating the read swizzles 6877 * 6878 * Reading the Y value is straightforward - just sample the texture. The hardware 6879 * takes care of filtering in the horizontal and vertical direction. 6880 * 6881 * Reading the U and V values is harder. We have to avoid filtering horizontally, 6882 * because that would mix the U and V values of one pixel or two adjacent pixels. 6883 * Thus floor the texture coordinate and add 0.5 to get an unfiltered read, 6884 * regardless of the filtering setting. Vertical filtering works automatically 6885 * though - the U and V values of two rows are mixed nicely. 6886 * 6887 * Appart of avoiding filtering issues, the code has to know which value it just 6888 * read, and where it can find the other one. To determine this, it checks if 6889 * it sampled an even or odd pixel, and shifts the 2nd read accordingly. 6890 * 6891 * Handling horizontal filtering of U and V values requires reading a 2nd pair 6892 * of pixels, extracting U and V and mixing them. This is not implemented yet. 6893 * 6894 * An alternative implementation idea is to load the texture as A8R8G8B8 texture, 6895 * with width / 2. This way one read gives all 3 values, finding U and V is easy 6896 * in an unfiltered situation. Finding the luminance on the other hand requires 6897 * finding out if it is an odd or even pixel. The real drawback of this approach 6898 * is filtering. This would have to be emulated completely in the shader, reading 6899 * up two 2 packed pixels in up to 2 rows and interpolating both horizontally and 6900 * vertically. Beyond that it would require adjustments to the texture handling 6901 * code to deal with the width scaling 6902 */ 6903 shader_addline(&buffer, "!!ARBfp1.0\n"); 6904 shader_addline(&buffer, "TEMP luminance;\n"); 6905 shader_addline(&buffer, "TEMP temp;\n"); 6906 shader_addline(&buffer, "TEMP chroma;\n"); 6907 shader_addline(&buffer, "TEMP texcrd;\n"); 6908 shader_addline(&buffer, "TEMP texcrd2;\n"); 6909 shader_addline(&buffer, "PARAM coef = {1.0, 0.5, 2.0, 0.25};\n"); 6910 shader_addline(&buffer, "PARAM yuv_coef = {1.403, 0.344, 0.714, 1.770};\n"); 6911 shader_addline(&buffer, "PARAM size = program.local[0];\n"); 6912 6913 switch (yuv_fixup) 6914 { 6915 case COMPLEX_FIXUP_UYVY: 6916 case COMPLEX_FIXUP_YUY2: 6917 if (!gen_planar_yuv_read(&buffer, yuv_fixup, textype, &luminance_component)) 6918 { 6919 shader_buffer_free(&buffer); 6920 return 0; 6921 } 6922 break; 6923 6924 case COMPLEX_FIXUP_YV12: 6925 if (!gen_yv12_read(&buffer, textype, &luminance_component)) 6926 { 6927 shader_buffer_free(&buffer); 6928 return 0; 6929 } 6930 break; 6931 6932 default: 6933 FIXME("Unsupported YUV fixup %#x\n", yuv_fixup); 6934 shader_buffer_free(&buffer); 6935 return 0; 6936 } 6937 6938 /* Calculate the final result. Formula is taken from 6939 * http://www.fourcc.org/fccyvrgb.php. Note that the chroma 6940 * ranges from -0.5 to 0.5 6941 */ 6942 shader_addline(&buffer, "SUB chroma.xy, chroma, coef.y;\n"); 6943 6944 shader_addline(&buffer, "MAD result.color.x, chroma.x, yuv_coef.x, luminance.%c;\n", luminance_component); 6945 shader_addline(&buffer, "MAD temp.x, -chroma.y, yuv_coef.y, luminance.%c;\n", luminance_component); 6946 shader_addline(&buffer, "MAD result.color.y, -chroma.x, yuv_coef.z, temp.x;\n"); 6947 shader_addline(&buffer, "MAD result.color.z, chroma.y, yuv_coef.w, luminance.%c;\n", luminance_component); 6948 shader_addline(&buffer, "END\n"); 6949 6950 ENTER_GL(); 6951 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 6952 strlen(buffer.buffer), buffer.buffer)); 6953 checkGLcall("glProgramStringARB()"); 6954 6955 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 6956 if (pos != -1) 6957 { 6958 FIXME("Fragment program error at position %d: %s\n\n", pos, 6959 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 6960 shader_arb_dump_program_source(buffer.buffer); 6961 } 6962 else 6963 { 6964 GLint native; 6965 6966 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 6967 checkGLcall("glGetProgramivARB()"); 6968 if (!native) WARN("Program exceeds native resource limits.\n"); 6969 } 6970 6971 shader_buffer_free(&buffer); 6972 LEAVE_GL(); 6973 6974 switch (yuv_fixup) 6975 { 6976 case COMPLEX_FIXUP_YUY2: 6977 if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->yuy2_rect_shader = shader; 6978 else priv->yuy2_2d_shader = shader; 6979 break; 6980 6981 case COMPLEX_FIXUP_UYVY: 6982 if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->uyvy_rect_shader = shader; 6983 else priv->uyvy_2d_shader = shader; 6984 break; 6985 6986 case COMPLEX_FIXUP_YV12: 6987 if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->yv12_rect_shader = shader; 6988 else priv->yv12_2d_shader = shader; 6989 break; 6990 default: 6991 ERR("Unsupported complex fixup: %d\n", yuv_fixup); 6992 } 6993 6994 return shader; 6995 } 6996 6997 /* Context activation is done by the caller. */ 6998 static HRESULT arbfp_blit_set(IWineD3DDevice *iface, IWineD3DSurfaceImpl *surface) 6999 { 7000 GLenum shader; 7001 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface; 7002 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 7003 float size[4] = {(float) surface->pow2Width, (float) surface->pow2Height, 1.0f, 1.0f}; 7004 struct arbfp_blit_priv *priv = device->blit_priv; 7005 enum complex_fixup fixup; 7006 GLenum textype = surface->texture_target; 7007 7008 if (!is_complex_fixup(surface->resource.format->color_fixup)) 7009 { 7010 TRACE("Fixup:\n"); 7011 dump_color_fixup_desc(surface->resource.format->color_fixup); 7012 /* Don't bother setting up a shader for unconverted formats */ 7013 ENTER_GL(); 7014 glEnable(textype); 7015 checkGLcall("glEnable(textype)"); 7016 LEAVE_GL(); 7017 return WINED3D_OK; 7018 } 7019 7020 fixup = get_complex_fixup(surface->resource.format->color_fixup); 7021 7022 switch(fixup) 7023 { 7024 case COMPLEX_FIXUP_YUY2: 7025 shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->yuy2_rect_shader : priv->yuy2_2d_shader; 7026 break; 7027 7028 case COMPLEX_FIXUP_UYVY: 7029 shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->uyvy_rect_shader : priv->uyvy_2d_shader; 7030 break; 7031 7032 case COMPLEX_FIXUP_YV12: 7033 shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->yv12_rect_shader : priv->yv12_2d_shader; 7034 break; 7035 7036 case COMPLEX_FIXUP_P8: 7037 shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->p8_rect_shader : priv->p8_2d_shader; 7038 if (!shader) shader = gen_p8_shader(device, textype); 7039 7040 upload_palette(surface); 7041 break; 7042 7043 default: 7044 FIXME("Unsupported complex fixup %#x, not setting a shader\n", fixup); 7045 ENTER_GL(); 7046 glEnable(textype); 7047 checkGLcall("glEnable(textype)"); 7048 LEAVE_GL(); 7049 return E_NOTIMPL; 7050 } 7051 7052 if (!shader) shader = gen_yuv_shader(device, fixup, textype); 7053 7054 ENTER_GL(); 7055 glEnable(GL_FRAGMENT_PROGRAM_ARB); 7056 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)"); 7057 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 7058 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)"); 7059 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, size)); 7060 checkGLcall("glProgramLocalParameter4fvARB"); 7061 LEAVE_GL(); 7062 7063 return WINED3D_OK; 7064 } 7065 7066 /* Context activation is done by the caller. */ 7067 static void arbfp_blit_unset(IWineD3DDevice *iface) { 7068 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface; 7069 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 7070 7071 ENTER_GL(); 7072 glDisable(GL_FRAGMENT_PROGRAM_ARB); 7073 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 7074 glDisable(GL_TEXTURE_2D); 7075 checkGLcall("glDisable(GL_TEXTURE_2D)"); 7076 if (gl_info->supported[ARB_TEXTURE_CUBE_MAP]) 7077 { 7078 glDisable(GL_TEXTURE_CUBE_MAP_ARB); 7079 checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)"); 7080 } 7081 if (gl_info->supported[ARB_TEXTURE_RECTANGLE]) 7082 { 7083 glDisable(GL_TEXTURE_RECTANGLE_ARB); 7084 checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)"); 7085 } 7086 LEAVE_GL(); 7087 } 7088 7089 static BOOL arbfp_blit_supported(const struct wined3d_gl_info *gl_info, enum blit_operation blit_op, 7090 const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format, 7091 const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format) 7092 { 7093 enum complex_fixup src_fixup; 7094 7095 if (blit_op != BLIT_OP_BLIT) 7096 { 7097 TRACE("Unsupported blit_op=%d\n", blit_op); 7098 return FALSE; 7099 } 7100 7101 src_fixup = get_complex_fixup(src_format->color_fixup); 7102 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d)) 7103 { 7104 TRACE("Checking support for fixup:\n"); 7105 dump_color_fixup_desc(src_format->color_fixup); 7106 } 7107 7108 if (!is_identity_fixup(dst_format->color_fixup)) 7109 { 7110 TRACE("Destination fixups are not supported\n"); 7111 return FALSE; 7112 } 7113 7114 if (is_identity_fixup(src_format->color_fixup)) 7115 { 7116 TRACE("[OK]\n"); 7117 return TRUE; 7118 } 7119 7120 /* We only support YUV conversions. */ 7121 if (!is_complex_fixup(src_format->color_fixup)) 7122 { 7123 TRACE("[FAILED]\n"); 7124 return FALSE; 7125 } 7126 7127 switch(src_fixup) 7128 { 7129 case COMPLEX_FIXUP_YUY2: 7130 case COMPLEX_FIXUP_UYVY: 7131 case COMPLEX_FIXUP_YV12: 7132 case COMPLEX_FIXUP_P8: 7133 TRACE("[OK]\n"); 7134 return TRUE; 7135 7136 default: 7137 FIXME("Unsupported YUV fixup %#x\n", src_fixup); 7138 TRACE("[FAILED]\n"); 7139 return FALSE; 7140 } 7141 } 7142 7143 HRESULT arbfp_blit_surface(IWineD3DDeviceImpl *device, IWineD3DSurfaceImpl *src_surface, const RECT *src_rect, 7144 IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect_in, enum blit_operation blit_op, 7145 DWORD Filter) 7146 { 7147 IWineD3DSwapChainImpl *dst_swapchain; 7148 struct wined3d_context *context; 7149 RECT dst_rect = *dst_rect_in; 7150 7151 /* Now load the surface */ 7152 surface_internal_preload(src_surface, SRGB_RGB); 7153 7154 /* Activate the destination context, set it up for blitting */ 7155 context = context_acquire(device, dst_surface); 7156 context_apply_blit_state(context, device); 7157 7158 /* The coordinates of the ddraw front buffer are always fullscreen ('screen coordinates', 7159 * while OpenGL coordinates are window relative. 7160 * Also beware of the origin difference(top left vs bottom left). 7161 * Also beware that the front buffer's surface size is screen width x screen height, 7162 * whereas the real gl drawable size is the size of the window. */ 7163 dst_swapchain = dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN 7164 ? dst_surface->container.u.swapchain : NULL; 7165 if (dst_swapchain && dst_surface == dst_swapchain->front_buffer) 7166 surface_translate_frontbuffer_coords(dst_surface, context->win_handle, &dst_rect); 7167 7168 arbfp_blit_set((IWineD3DDevice *)device, src_surface); 7169 7170 ENTER_GL(); 7171 7172 /* Draw a textured quad */ 7173 draw_textured_quad(src_surface, src_rect, &dst_rect, Filter); 7174 7175 LEAVE_GL(); 7176 7177 /* Leave the opengl state valid for blitting */ 7178 arbfp_blit_unset((IWineD3DDevice *)device); 7179 7180 if (wined3d_settings.strict_draw_ordering || (dst_swapchain 7181 && (dst_surface == dst_swapchain->front_buffer 7182 || dst_swapchain->num_contexts > 1))) 7183 wglFlush(); /* Flush to ensure ordering across contexts. */ 7184 7185 context_release(context); 7186 7187 surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE); 7188 return WINED3D_OK; 7189 } 7190 7191 /* Do not call while under the GL lock. */ 7192 static HRESULT arbfp_blit_color_fill(IWineD3DDeviceImpl *device, IWineD3DSurfaceImpl *dst_surface, 7193 const RECT *dst_rect, const WINED3DCOLORVALUE *color) 7194 { 7195 FIXME("Color filling not implemented by arbfp_blit\n"); 7196 return WINED3DERR_INVALIDCALL; 7197 } 7198 7199 const struct blit_shader arbfp_blit = { 7200 arbfp_blit_alloc, 7201 arbfp_blit_free, 7202 arbfp_blit_set, 7203 arbfp_blit_unset, 7204 arbfp_blit_supported, 7205 arbfp_blit_color_fill 7206 }; 7207