1 /*
2  * Direct3D bytecode output functions
3  *
4  * Copyright 2008 Stefan Dösinger
5  * Copyright 2009 Matteo Bruni
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20  *
21  */
22 
23 #include "wine/debug.h"
24 
25 #include "d3d9types.h"
26 #include "d3dcompiler_private.h"
27 
28 WINE_DEFAULT_DEBUG_CHANNEL(bytecodewriter);
29 
30 /****************************************************************
31  * General assembler shader construction helper routines follow *
32  ****************************************************************/
33 /* struct instruction *alloc_instr
34  *
35  * Allocates a new instruction structure with srcs registers
36  *
37  * Parameters:
38  *  srcs: Number of source registers to allocate
39  *
40  * Returns:
41  *  A pointer to the allocated instruction structure
42  *  NULL in case of an allocation failure
43  */
44 struct instruction *alloc_instr(unsigned int srcs) {
45     struct instruction *ret = d3dcompiler_alloc(sizeof(*ret));
46     if(!ret) {
47         ERR("Failed to allocate memory for an instruction structure\n");
48         return NULL;
49     }
50 
51     if(srcs) {
52         ret->src = d3dcompiler_alloc(srcs * sizeof(*ret->src));
53         if(!ret->src) {
54             ERR("Failed to allocate memory for instruction registers\n");
55             d3dcompiler_free(ret);
56             return NULL;
57         }
58         ret->num_srcs = srcs;
59     }
60     return ret;
61 }
62 
63 /* void add_instruction
64  *
65  * Adds a new instruction to the shader's instructions array and grows the instruction array
66  * if needed.
67  *
68  * The function does NOT copy the instruction structure. Make sure not to release the
69  * instruction or any of its substructures like registers.
70  *
71  * Parameters:
72  *  shader: Shader to add the instruction to
73  *  instr: Instruction to add to the shader
74  */
75 BOOL add_instruction(struct bwriter_shader *shader, struct instruction *instr) {
76     struct instruction      **new_instructions;
77 
78     if(!shader) return FALSE;
79 
80     if(shader->instr_alloc_size == 0) {
81         shader->instr = d3dcompiler_alloc(sizeof(*shader->instr) * INSTRARRAY_INITIAL_SIZE);
82         if(!shader->instr) {
83             ERR("Failed to allocate the shader instruction array\n");
84             return FALSE;
85         }
86         shader->instr_alloc_size = INSTRARRAY_INITIAL_SIZE;
87     } else if(shader->instr_alloc_size == shader->num_instrs) {
88         new_instructions = d3dcompiler_realloc(shader->instr,
89                                        sizeof(*shader->instr) * (shader->instr_alloc_size) * 2);
90         if(!new_instructions) {
91             ERR("Failed to grow the shader instruction array\n");
92             return FALSE;
93         }
94         shader->instr = new_instructions;
95         shader->instr_alloc_size = shader->instr_alloc_size * 2;
96     } else if(shader->num_instrs > shader->instr_alloc_size) {
97         ERR("More instructions than allocated. This should not happen\n");
98         return FALSE;
99     }
100 
101     shader->instr[shader->num_instrs] = instr;
102     shader->num_instrs++;
103     return TRUE;
104 }
105 
106 BOOL add_constF(struct bwriter_shader *shader, DWORD reg, float x, float y, float z, float w) {
107     struct constant *newconst;
108 
109     if(shader->num_cf) {
110         struct constant **newarray;
111         newarray = d3dcompiler_realloc(shader->constF,
112                                sizeof(*shader->constF) * (shader->num_cf + 1));
113         if(!newarray) {
114             ERR("Failed to grow the constants array\n");
115             return FALSE;
116         }
117         shader->constF = newarray;
118     } else {
119         shader->constF = d3dcompiler_alloc(sizeof(*shader->constF));
120         if(!shader->constF) {
121             ERR("Failed to allocate the constants array\n");
122             return FALSE;
123         }
124     }
125 
126     newconst = d3dcompiler_alloc(sizeof(*newconst));
127     if(!newconst) {
128         ERR("Failed to allocate a new constant\n");
129         return FALSE;
130     }
131     newconst->regnum = reg;
132     newconst->value[0].f = x;
133     newconst->value[1].f = y;
134     newconst->value[2].f = z;
135     newconst->value[3].f = w;
136     shader->constF[shader->num_cf] = newconst;
137 
138     shader->num_cf++;
139     return TRUE;
140 }
141 
142 BOOL add_constI(struct bwriter_shader *shader, DWORD reg, INT x, INT y, INT z, INT w) {
143     struct constant *newconst;
144 
145     if(shader->num_ci) {
146         struct constant **newarray;
147         newarray = d3dcompiler_realloc(shader->constI,
148                                sizeof(*shader->constI) * (shader->num_ci + 1));
149         if(!newarray) {
150             ERR("Failed to grow the constants array\n");
151             return FALSE;
152         }
153         shader->constI = newarray;
154     } else {
155         shader->constI = d3dcompiler_alloc(sizeof(*shader->constI));
156         if(!shader->constI) {
157             ERR("Failed to allocate the constants array\n");
158             return FALSE;
159         }
160     }
161 
162     newconst = d3dcompiler_alloc(sizeof(*newconst));
163     if(!newconst) {
164         ERR("Failed to allocate a new constant\n");
165         return FALSE;
166     }
167     newconst->regnum = reg;
168     newconst->value[0].i = x;
169     newconst->value[1].i = y;
170     newconst->value[2].i = z;
171     newconst->value[3].i = w;
172     shader->constI[shader->num_ci] = newconst;
173 
174     shader->num_ci++;
175     return TRUE;
176 }
177 
178 BOOL add_constB(struct bwriter_shader *shader, DWORD reg, BOOL x) {
179     struct constant *newconst;
180 
181     if(shader->num_cb) {
182         struct constant **newarray;
183         newarray = d3dcompiler_realloc(shader->constB,
184                                sizeof(*shader->constB) * (shader->num_cb + 1));
185         if(!newarray) {
186             ERR("Failed to grow the constants array\n");
187             return FALSE;
188         }
189         shader->constB = newarray;
190     } else {
191         shader->constB = d3dcompiler_alloc(sizeof(*shader->constB));
192         if(!shader->constB) {
193             ERR("Failed to allocate the constants array\n");
194             return FALSE;
195         }
196     }
197 
198     newconst = d3dcompiler_alloc(sizeof(*newconst));
199     if(!newconst) {
200         ERR("Failed to allocate a new constant\n");
201         return FALSE;
202     }
203     newconst->regnum = reg;
204     newconst->value[0].b = x;
205     shader->constB[shader->num_cb] = newconst;
206 
207     shader->num_cb++;
208     return TRUE;
209 }
210 
211 BOOL record_declaration(struct bwriter_shader *shader, DWORD usage,
212                         DWORD usage_idx, DWORD mod, BOOL output,
213                         DWORD regnum, DWORD writemask, BOOL builtin) {
214     unsigned int *num;
215     struct declaration **decl;
216     unsigned int i;
217 
218     if(!shader) return FALSE;
219 
220     if(output) {
221         num = &shader->num_outputs;
222         decl = &shader->outputs;
223     } else {
224         num = &shader->num_inputs;
225         decl = &shader->inputs;
226     }
227 
228     if(*num == 0) {
229         *decl = d3dcompiler_alloc(sizeof(**decl));
230         if(!*decl) {
231             ERR("Error allocating declarations array\n");
232             return FALSE;
233         }
234     } else {
235         struct declaration *newdecl;
236         for(i = 0; i < *num; i++) {
237             if((*decl)[i].regnum == regnum && ((*decl)[i].writemask & writemask)) {
238                 WARN("Declaration of register %u already exists, writemask match 0x%x\n",
239                       regnum, (*decl)[i].writemask & writemask);
240             }
241         }
242 
243         newdecl = d3dcompiler_realloc(*decl,
244                               sizeof(**decl) * ((*num) + 1));
245         if(!newdecl) {
246             ERR("Error reallocating declarations array\n");
247             return FALSE;
248         }
249         *decl = newdecl;
250     }
251     (*decl)[*num].usage = usage;
252     (*decl)[*num].usage_idx = usage_idx;
253     (*decl)[*num].regnum = regnum;
254     (*decl)[*num].mod = mod;
255     (*decl)[*num].writemask = writemask;
256     (*decl)[*num].builtin = builtin;
257     (*num)++;
258 
259     return TRUE;
260 }
261 
262 BOOL record_sampler(struct bwriter_shader *shader, DWORD samptype, DWORD mod, DWORD regnum) {
263     unsigned int i;
264 
265     if(!shader) return FALSE;
266 
267     if(shader->num_samplers == 0) {
268         shader->samplers = d3dcompiler_alloc(sizeof(*shader->samplers));
269         if(!shader->samplers) {
270             ERR("Error allocating samplers array\n");
271             return FALSE;
272         }
273     } else {
274         struct samplerdecl *newarray;
275 
276         for(i = 0; i < shader->num_samplers; i++) {
277             if(shader->samplers[i].regnum == regnum) {
278                 WARN("Sampler %u already declared\n", regnum);
279                 /* This is not an error as far as the assembler is concerned.
280                  * Direct3D might refuse to load the compiled shader though
281                  */
282             }
283         }
284 
285         newarray = d3dcompiler_realloc(shader->samplers,
286                                sizeof(*shader->samplers) * (shader->num_samplers + 1));
287         if(!newarray) {
288             ERR("Error reallocating samplers array\n");
289             return FALSE;
290         }
291         shader->samplers = newarray;
292     }
293 
294     shader->samplers[shader->num_samplers].type = samptype;
295     shader->samplers[shader->num_samplers].mod = mod;
296     shader->samplers[shader->num_samplers].regnum = regnum;
297     shader->num_samplers++;
298     return TRUE;
299 }
300 
301 
302 /* shader bytecode buffer manipulation functions.
303  * allocate_buffer creates a new buffer structure, put_dword adds a new
304  * DWORD to the buffer. In the rare case of a memory allocation failure
305  * when trying to grow the buffer a flag is set in the buffer to mark it
306  * invalid. This avoids return value checking and passing in many places
307  */
308 static struct bytecode_buffer *allocate_buffer(void) {
309     struct bytecode_buffer *ret;
310 
311     ret = d3dcompiler_alloc(sizeof(*ret));
312     if(!ret) return NULL;
313 
314     ret->alloc_size = BYTECODEBUFFER_INITIAL_SIZE;
315     ret->data = d3dcompiler_alloc(sizeof(DWORD) * ret->alloc_size);
316     if(!ret->data) {
317         d3dcompiler_free(ret);
318         return NULL;
319     }
320     ret->state = S_OK;
321     return ret;
322 }
323 
324 static void put_dword(struct bytecode_buffer *buffer, DWORD value) {
325     if(FAILED(buffer->state)) return;
326 
327     if(buffer->alloc_size == buffer->size) {
328         DWORD *newarray;
329         buffer->alloc_size *= 2;
330         newarray = d3dcompiler_realloc(buffer->data,
331                                sizeof(DWORD) * buffer->alloc_size);
332         if(!newarray) {
333             ERR("Failed to grow the buffer data memory\n");
334             buffer->state = E_OUTOFMEMORY;
335             return;
336         }
337         buffer->data = newarray;
338     }
339     buffer->data[buffer->size++] = value;
340 }
341 
342 /* bwriter -> d3d9 conversion functions. */
343 static DWORD d3d9_swizzle(DWORD bwriter_swizzle)
344 {
345     /* Currently a NOP, but this allows changing the internal definitions
346      * without side effects. */
347     DWORD ret = 0;
348 
349     if ((bwriter_swizzle & BWRITERVS_X_X) == BWRITERVS_X_X) ret |= D3DVS_X_X;
350     if ((bwriter_swizzle & BWRITERVS_X_Y) == BWRITERVS_X_Y) ret |= D3DVS_X_Y;
351     if ((bwriter_swizzle & BWRITERVS_X_Z) == BWRITERVS_X_Z) ret |= D3DVS_X_Z;
352     if ((bwriter_swizzle & BWRITERVS_X_W) == BWRITERVS_X_W) ret |= D3DVS_X_W;
353 
354     if ((bwriter_swizzle & BWRITERVS_Y_X) == BWRITERVS_Y_X) ret |= D3DVS_Y_X;
355     if ((bwriter_swizzle & BWRITERVS_Y_Y) == BWRITERVS_Y_Y) ret |= D3DVS_Y_Y;
356     if ((bwriter_swizzle & BWRITERVS_Y_Z) == BWRITERVS_Y_Z) ret |= D3DVS_Y_Z;
357     if ((bwriter_swizzle & BWRITERVS_Y_W) == BWRITERVS_Y_W) ret |= D3DVS_Y_W;
358 
359     if ((bwriter_swizzle & BWRITERVS_Z_X) == BWRITERVS_Z_X) ret |= D3DVS_Z_X;
360     if ((bwriter_swizzle & BWRITERVS_Z_Y) == BWRITERVS_Z_Y) ret |= D3DVS_Z_Y;
361     if ((bwriter_swizzle & BWRITERVS_Z_Z) == BWRITERVS_Z_Z) ret |= D3DVS_Z_Z;
362     if ((bwriter_swizzle & BWRITERVS_Z_W) == BWRITERVS_Z_W) ret |= D3DVS_Z_W;
363 
364     if ((bwriter_swizzle & BWRITERVS_W_X) == BWRITERVS_W_X) ret |= D3DVS_W_X;
365     if ((bwriter_swizzle & BWRITERVS_W_Y) == BWRITERVS_W_Y) ret |= D3DVS_W_Y;
366     if ((bwriter_swizzle & BWRITERVS_W_Z) == BWRITERVS_W_Z) ret |= D3DVS_W_Z;
367     if ((bwriter_swizzle & BWRITERVS_W_W) == BWRITERVS_W_W) ret |= D3DVS_W_W;
368 
369     return ret;
370 }
371 
372 static DWORD d3d9_writemask(DWORD bwriter_writemask)
373 {
374     DWORD ret = 0;
375 
376     if (bwriter_writemask & BWRITERSP_WRITEMASK_0) ret |= D3DSP_WRITEMASK_0;
377     if (bwriter_writemask & BWRITERSP_WRITEMASK_1) ret |= D3DSP_WRITEMASK_1;
378     if (bwriter_writemask & BWRITERSP_WRITEMASK_2) ret |= D3DSP_WRITEMASK_2;
379     if (bwriter_writemask & BWRITERSP_WRITEMASK_3) ret |= D3DSP_WRITEMASK_3;
380 
381     return ret;
382 }
383 
384 static DWORD d3d9_srcmod(DWORD bwriter_srcmod)
385 {
386     switch (bwriter_srcmod)
387     {
388         case BWRITERSPSM_NONE:       return D3DSPSM_NONE;
389         case BWRITERSPSM_NEG:        return D3DSPSM_NEG;
390         case BWRITERSPSM_BIAS:       return D3DSPSM_BIAS;
391         case BWRITERSPSM_BIASNEG:    return D3DSPSM_BIASNEG;
392         case BWRITERSPSM_SIGN:       return D3DSPSM_SIGN;
393         case BWRITERSPSM_SIGNNEG:    return D3DSPSM_SIGNNEG;
394         case BWRITERSPSM_COMP:       return D3DSPSM_COMP;
395         case BWRITERSPSM_X2:         return D3DSPSM_X2;
396         case BWRITERSPSM_X2NEG:      return D3DSPSM_X2NEG;
397         case BWRITERSPSM_DZ:         return D3DSPSM_DZ;
398         case BWRITERSPSM_DW:         return D3DSPSM_DW;
399         case BWRITERSPSM_ABS:        return D3DSPSM_ABS;
400         case BWRITERSPSM_ABSNEG:     return D3DSPSM_ABSNEG;
401         case BWRITERSPSM_NOT:        return D3DSPSM_NOT;
402         default:
403             FIXME("Unhandled BWRITERSPSM token %#x.\n", bwriter_srcmod);
404             return 0;
405     }
406 }
407 
408 static DWORD d3d9_dstmod(DWORD bwriter_mod)
409 {
410     DWORD ret = 0;
411 
412     if (bwriter_mod & BWRITERSPDM_SATURATE)         ret |= D3DSPDM_SATURATE;
413     if (bwriter_mod & BWRITERSPDM_PARTIALPRECISION) ret |= D3DSPDM_PARTIALPRECISION;
414     if (bwriter_mod & BWRITERSPDM_MSAMPCENTROID)    ret |= D3DSPDM_MSAMPCENTROID;
415 
416     return ret;
417 }
418 
419 static DWORD d3d9_comparetype(DWORD asmshader_comparetype)
420 {
421     switch (asmshader_comparetype)
422     {
423         case BWRITER_COMPARISON_GT:     return D3DSPC_GT;
424         case BWRITER_COMPARISON_EQ:     return D3DSPC_EQ;
425         case BWRITER_COMPARISON_GE:     return D3DSPC_GE;
426         case BWRITER_COMPARISON_LT:     return D3DSPC_LT;
427         case BWRITER_COMPARISON_NE:     return D3DSPC_NE;
428         case BWRITER_COMPARISON_LE:     return D3DSPC_LE;
429         default:
430             FIXME("Unexpected BWRITER_COMPARISON type %#x.\n", asmshader_comparetype);
431             return 0;
432     }
433 }
434 
435 static DWORD d3d9_sampler(DWORD bwriter_sampler)
436 {
437     if (bwriter_sampler == BWRITERSTT_UNKNOWN)  return D3DSTT_UNKNOWN;
438     if (bwriter_sampler == BWRITERSTT_1D)       return D3DSTT_1D;
439     if (bwriter_sampler == BWRITERSTT_2D)       return D3DSTT_2D;
440     if (bwriter_sampler == BWRITERSTT_CUBE)     return D3DSTT_CUBE;
441     if (bwriter_sampler == BWRITERSTT_VOLUME)   return D3DSTT_VOLUME;
442     FIXME("Unexpected BWRITERSAMPLER_TEXTURE_TYPE type %#x.\n", bwriter_sampler);
443 
444     return 0;
445 }
446 
447 static DWORD d3d9_register(DWORD bwriter_register)
448 {
449     if (bwriter_register == BWRITERSPR_TEMP)        return D3DSPR_TEMP;
450     if (bwriter_register == BWRITERSPR_INPUT)       return D3DSPR_INPUT;
451     if (bwriter_register == BWRITERSPR_CONST)       return D3DSPR_CONST;
452     if (bwriter_register == BWRITERSPR_ADDR)        return D3DSPR_ADDR;
453     if (bwriter_register == BWRITERSPR_TEXTURE)     return D3DSPR_TEXTURE;
454     if (bwriter_register == BWRITERSPR_RASTOUT)     return D3DSPR_RASTOUT;
455     if (bwriter_register == BWRITERSPR_ATTROUT)     return D3DSPR_ATTROUT;
456     if (bwriter_register == BWRITERSPR_TEXCRDOUT)   return D3DSPR_TEXCRDOUT;
457     if (bwriter_register == BWRITERSPR_OUTPUT)      return D3DSPR_OUTPUT;
458     if (bwriter_register == BWRITERSPR_CONSTINT)    return D3DSPR_CONSTINT;
459     if (bwriter_register == BWRITERSPR_COLOROUT)    return D3DSPR_COLOROUT;
460     if (bwriter_register == BWRITERSPR_DEPTHOUT)    return D3DSPR_DEPTHOUT;
461     if (bwriter_register == BWRITERSPR_SAMPLER)     return D3DSPR_SAMPLER;
462     if (bwriter_register == BWRITERSPR_CONSTBOOL)   return D3DSPR_CONSTBOOL;
463     if (bwriter_register == BWRITERSPR_LOOP)        return D3DSPR_LOOP;
464     if (bwriter_register == BWRITERSPR_MISCTYPE)    return D3DSPR_MISCTYPE;
465     if (bwriter_register == BWRITERSPR_LABEL)       return D3DSPR_LABEL;
466     if (bwriter_register == BWRITERSPR_PREDICATE)   return D3DSPR_PREDICATE;
467 
468     FIXME("Unexpected BWRITERSPR %#x.\n", bwriter_register);
469     return ~0U;
470 }
471 
472 static DWORD d3d9_opcode(DWORD bwriter_opcode)
473 {
474     switch (bwriter_opcode)
475     {
476         case BWRITERSIO_NOP:         return D3DSIO_NOP;
477         case BWRITERSIO_MOV:         return D3DSIO_MOV;
478         case BWRITERSIO_ADD:         return D3DSIO_ADD;
479         case BWRITERSIO_SUB:         return D3DSIO_SUB;
480         case BWRITERSIO_MAD:         return D3DSIO_MAD;
481         case BWRITERSIO_MUL:         return D3DSIO_MUL;
482         case BWRITERSIO_RCP:         return D3DSIO_RCP;
483         case BWRITERSIO_RSQ:         return D3DSIO_RSQ;
484         case BWRITERSIO_DP3:         return D3DSIO_DP3;
485         case BWRITERSIO_DP4:         return D3DSIO_DP4;
486         case BWRITERSIO_MIN:         return D3DSIO_MIN;
487         case BWRITERSIO_MAX:         return D3DSIO_MAX;
488         case BWRITERSIO_SLT:         return D3DSIO_SLT;
489         case BWRITERSIO_SGE:         return D3DSIO_SGE;
490         case BWRITERSIO_EXP:         return D3DSIO_EXP;
491         case BWRITERSIO_LOG:         return D3DSIO_LOG;
492         case BWRITERSIO_LIT:         return D3DSIO_LIT;
493         case BWRITERSIO_DST:         return D3DSIO_DST;
494         case BWRITERSIO_LRP:         return D3DSIO_LRP;
495         case BWRITERSIO_FRC:         return D3DSIO_FRC;
496         case BWRITERSIO_M4x4:        return D3DSIO_M4x4;
497         case BWRITERSIO_M4x3:        return D3DSIO_M4x3;
498         case BWRITERSIO_M3x4:        return D3DSIO_M3x4;
499         case BWRITERSIO_M3x3:        return D3DSIO_M3x3;
500         case BWRITERSIO_M3x2:        return D3DSIO_M3x2;
501         case BWRITERSIO_CALL:        return D3DSIO_CALL;
502         case BWRITERSIO_CALLNZ:      return D3DSIO_CALLNZ;
503         case BWRITERSIO_LOOP:        return D3DSIO_LOOP;
504         case BWRITERSIO_RET:         return D3DSIO_RET;
505         case BWRITERSIO_ENDLOOP:     return D3DSIO_ENDLOOP;
506         case BWRITERSIO_LABEL:       return D3DSIO_LABEL;
507         case BWRITERSIO_DCL:         return D3DSIO_DCL;
508         case BWRITERSIO_POW:         return D3DSIO_POW;
509         case BWRITERSIO_CRS:         return D3DSIO_CRS;
510         case BWRITERSIO_SGN:         return D3DSIO_SGN;
511         case BWRITERSIO_ABS:         return D3DSIO_ABS;
512         case BWRITERSIO_NRM:         return D3DSIO_NRM;
513         case BWRITERSIO_SINCOS:      return D3DSIO_SINCOS;
514         case BWRITERSIO_REP:         return D3DSIO_REP;
515         case BWRITERSIO_ENDREP:      return D3DSIO_ENDREP;
516         case BWRITERSIO_IF:          return D3DSIO_IF;
517         case BWRITERSIO_IFC:         return D3DSIO_IFC;
518         case BWRITERSIO_ELSE:        return D3DSIO_ELSE;
519         case BWRITERSIO_ENDIF:       return D3DSIO_ENDIF;
520         case BWRITERSIO_BREAK:       return D3DSIO_BREAK;
521         case BWRITERSIO_BREAKC:      return D3DSIO_BREAKC;
522         case BWRITERSIO_MOVA:        return D3DSIO_MOVA;
523         case BWRITERSIO_DEFB:        return D3DSIO_DEFB;
524         case BWRITERSIO_DEFI:        return D3DSIO_DEFI;
525 
526         case BWRITERSIO_TEXCOORD:    return D3DSIO_TEXCOORD;
527         case BWRITERSIO_TEXKILL:     return D3DSIO_TEXKILL;
528         case BWRITERSIO_TEX:         return D3DSIO_TEX;
529         case BWRITERSIO_TEXBEM:      return D3DSIO_TEXBEM;
530         case BWRITERSIO_TEXBEML:     return D3DSIO_TEXBEML;
531         case BWRITERSIO_TEXREG2AR:   return D3DSIO_TEXREG2AR;
532         case BWRITERSIO_TEXREG2GB:   return D3DSIO_TEXREG2GB;
533         case BWRITERSIO_TEXM3x2PAD:  return D3DSIO_TEXM3x2PAD;
534         case BWRITERSIO_TEXM3x2TEX:  return D3DSIO_TEXM3x2TEX;
535         case BWRITERSIO_TEXM3x3PAD:  return D3DSIO_TEXM3x3PAD;
536         case BWRITERSIO_TEXM3x3TEX:  return D3DSIO_TEXM3x3TEX;
537         case BWRITERSIO_TEXM3x3SPEC: return D3DSIO_TEXM3x3SPEC;
538         case BWRITERSIO_TEXM3x3VSPEC:return D3DSIO_TEXM3x3VSPEC;
539         case BWRITERSIO_EXPP:        return D3DSIO_EXPP;
540         case BWRITERSIO_LOGP:        return D3DSIO_LOGP;
541         case BWRITERSIO_CND:         return D3DSIO_CND;
542         case BWRITERSIO_DEF:         return D3DSIO_DEF;
543         case BWRITERSIO_TEXREG2RGB:  return D3DSIO_TEXREG2RGB;
544         case BWRITERSIO_TEXDP3TEX:   return D3DSIO_TEXDP3TEX;
545         case BWRITERSIO_TEXM3x2DEPTH:return D3DSIO_TEXM3x2DEPTH;
546         case BWRITERSIO_TEXDP3:      return D3DSIO_TEXDP3;
547         case BWRITERSIO_TEXM3x3:     return D3DSIO_TEXM3x3;
548         case BWRITERSIO_TEXDEPTH:    return D3DSIO_TEXDEPTH;
549         case BWRITERSIO_CMP:         return D3DSIO_CMP;
550         case BWRITERSIO_BEM:         return D3DSIO_BEM;
551         case BWRITERSIO_DP2ADD:      return D3DSIO_DP2ADD;
552         case BWRITERSIO_DSX:         return D3DSIO_DSX;
553         case BWRITERSIO_DSY:         return D3DSIO_DSY;
554         case BWRITERSIO_TEXLDD:      return D3DSIO_TEXLDD;
555         case BWRITERSIO_SETP:        return D3DSIO_SETP;
556         case BWRITERSIO_TEXLDL:      return D3DSIO_TEXLDL;
557         case BWRITERSIO_BREAKP:      return D3DSIO_BREAKP;
558 
559         case BWRITERSIO_PHASE:       return D3DSIO_PHASE;
560         case BWRITERSIO_COMMENT:     return D3DSIO_COMMENT;
561         case BWRITERSIO_END:         return D3DSIO_END;
562 
563         case BWRITERSIO_TEXLDP:      return D3DSIO_TEX | D3DSI_TEXLD_PROJECT;
564         case BWRITERSIO_TEXLDB:      return D3DSIO_TEX | D3DSI_TEXLD_BIAS;
565 
566         default:
567             FIXME("Unhandled BWRITERSIO token %#x.\n", bwriter_opcode);
568             return ~0U;
569     }
570 }
571 
572 static DWORD d3dsp_register( D3DSHADER_PARAM_REGISTER_TYPE type, DWORD num )
573 {
574     return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
575            ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
576            (num & D3DSP_REGNUM_MASK); /* No shift */
577 }
578 
579 /******************************************************
580  * Implementation of the writer functions starts here *
581  ******************************************************/
582 static void write_declarations(struct bc_writer *This,
583                                struct bytecode_buffer *buffer, BOOL len,
584                                const struct declaration *decls, unsigned int num, DWORD type) {
585     DWORD i;
586     DWORD instr_dcl = D3DSIO_DCL;
587     DWORD token;
588     struct shader_reg reg;
589 
590     ZeroMemory(&reg, sizeof(reg));
591 
592     if(len) {
593         instr_dcl |= 2 << D3DSI_INSTLENGTH_SHIFT;
594     }
595 
596     for(i = 0; i < num; i++) {
597         if(decls[i].builtin) continue;
598 
599         /* Write the DCL instruction */
600         put_dword(buffer, instr_dcl);
601 
602         /* Write the usage and index */
603         token = (1u << 31); /* Bit 31 of non-instruction opcodes is 1 */
604         token |= (decls[i].usage << D3DSP_DCL_USAGE_SHIFT) & D3DSP_DCL_USAGE_MASK;
605         token |= (decls[i].usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT) & D3DSP_DCL_USAGEINDEX_MASK;
606         put_dword(buffer, token);
607 
608         /* Write the dest register */
609         reg.type = type;
610         reg.regnum = decls[i].regnum;
611         reg.u.writemask = decls[i].writemask;
612         This->funcs->dstreg(This, &reg, buffer, 0, decls[i].mod);
613     }
614 }
615 
616 static void write_const(struct constant **consts, int num, DWORD opcode, DWORD reg_type, struct bytecode_buffer *buffer, BOOL len) {
617     int i;
618     DWORD instr_def = opcode;
619     const DWORD reg = (1u << 31) | d3dsp_register( reg_type, 0 ) | D3DSP_WRITEMASK_ALL;
620 
621     if(len) {
622         if(opcode == D3DSIO_DEFB)
623             instr_def |= 2 << D3DSI_INSTLENGTH_SHIFT;
624         else
625             instr_def |= 5 << D3DSI_INSTLENGTH_SHIFT;
626     }
627 
628     for(i = 0; i < num; i++) {
629         /* Write the DEF instruction */
630         put_dword(buffer, instr_def);
631 
632         put_dword(buffer, reg | (consts[i]->regnum & D3DSP_REGNUM_MASK));
633         put_dword(buffer, consts[i]->value[0].d);
634         if(opcode != D3DSIO_DEFB) {
635             put_dword(buffer, consts[i]->value[1].d);
636             put_dword(buffer, consts[i]->value[2].d);
637             put_dword(buffer, consts[i]->value[3].d);
638         }
639     }
640 }
641 
642 static void write_constF(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
643     write_const(shader->constF, shader->num_cf, D3DSIO_DEF, D3DSPR_CONST, buffer, len);
644 }
645 
646 /* This function looks for VS 1/2 registers mapping to VS 3 output registers */
647 static HRESULT vs_find_builtin_varyings(struct bc_writer *This, const struct bwriter_shader *shader) {
648     DWORD i;
649     DWORD usage, usage_idx, writemask, regnum;
650 
651     for(i = 0; i < shader->num_outputs; i++) {
652         if(!shader->outputs[i].builtin) continue;
653 
654         usage = shader->outputs[i].usage;
655         usage_idx = shader->outputs[i].usage_idx;
656         writemask = shader->outputs[i].writemask;
657         regnum = shader->outputs[i].regnum;
658 
659         switch(usage) {
660             case BWRITERDECLUSAGE_POSITION:
661             case BWRITERDECLUSAGE_POSITIONT:
662                 if(usage_idx > 0) {
663                     WARN("dcl_position%u not supported in sm 1/2 shaders\n", usage_idx);
664                     return E_INVALIDARG;
665                 }
666                 TRACE("o%u is oPos\n", regnum);
667                 This->oPos_regnum = regnum;
668                 break;
669 
670             case BWRITERDECLUSAGE_COLOR:
671                 if(usage_idx > 1) {
672                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
673                     return E_INVALIDARG;
674                 }
675                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
676                     WARN("Only WRITEMASK_ALL is supported on color in sm 1/2\n");
677                     return E_INVALIDARG;
678                 }
679                 TRACE("o%u is oD%u\n", regnum, usage_idx);
680                 This->oD_regnum[usage_idx] = regnum;
681                 break;
682 
683             case BWRITERDECLUSAGE_TEXCOORD:
684                 if(usage_idx >= 8) {
685                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
686                     return E_INVALIDARG;
687                 }
688                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
689                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
690                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
691                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
692                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
693                     return E_INVALIDARG;
694                 }
695                 TRACE("o%u is oT%u\n", regnum, usage_idx);
696                 This->oT_regnum[usage_idx] = regnum;
697                 break;
698 
699             case BWRITERDECLUSAGE_PSIZE:
700                 if(usage_idx > 0) {
701                     WARN("dcl_psize%u not supported in sm 1/2 shaders\n", usage_idx);
702                     return E_INVALIDARG;
703                 }
704                 TRACE("o%u writemask 0x%08x is oPts\n", regnum, writemask);
705                 This->oPts_regnum = regnum;
706                 This->oPts_mask = writemask;
707                 break;
708 
709             case BWRITERDECLUSAGE_FOG:
710                 if(usage_idx > 0) {
711                     WARN("dcl_fog%u not supported in sm 1 shaders\n", usage_idx);
712                     return E_INVALIDARG;
713                 }
714                 if(writemask != BWRITERSP_WRITEMASK_0 && writemask != BWRITERSP_WRITEMASK_1 &&
715                    writemask != BWRITERSP_WRITEMASK_2 && writemask != BWRITERSP_WRITEMASK_3) {
716                     WARN("Unsupported fog writemask\n");
717                     return E_INVALIDARG;
718                 }
719                 TRACE("o%u writemask 0x%08x is oFog\n", regnum, writemask);
720                 This->oFog_regnum = regnum;
721                 This->oFog_mask = writemask;
722                 break;
723 
724             default:
725                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
726                 return E_INVALIDARG;
727         }
728     }
729 
730     return S_OK;
731 }
732 
733 static void vs_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
734     HRESULT hr;
735 
736     if(shader->num_ci || shader->num_cb) {
737         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
738         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
739         This->state = E_INVALIDARG;
740         return;
741     }
742 
743     hr = vs_find_builtin_varyings(This, shader);
744     if(FAILED(hr)) {
745         This->state = hr;
746         return;
747     }
748 
749     write_declarations(This, buffer, FALSE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
750     write_constF(shader, buffer, FALSE);
751 }
752 
753 static HRESULT find_ps_builtin_semantics(struct bc_writer *This,
754                                          const struct bwriter_shader *shader,
755                                          DWORD texcoords) {
756     DWORD i;
757     DWORD usage, usage_idx, writemask, regnum;
758 
759     This->v_regnum[0] = -1; This->v_regnum[1] = -1;
760     for(i = 0; i < 8; i++) This->t_regnum[i] = -1;
761 
762     for(i = 0; i < shader->num_inputs; i++) {
763         if(!shader->inputs[i].builtin) continue;
764 
765         usage = shader->inputs[i].usage;
766         usage_idx = shader->inputs[i].usage_idx;
767         writemask = shader->inputs[i].writemask;
768         regnum = shader->inputs[i].regnum;
769 
770         switch(usage) {
771             case BWRITERDECLUSAGE_COLOR:
772                 if(usage_idx > 1) {
773                     WARN("dcl_color%u not supported in sm 1 shaders\n", usage_idx);
774                     return E_INVALIDARG;
775                 }
776                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
777                     WARN("Only WRITEMASK_ALL is supported on color in sm 1\n");
778                     return E_INVALIDARG;
779                 }
780                 TRACE("v%u is v%u\n", regnum, usage_idx);
781                 This->v_regnum[usage_idx] = regnum;
782                 break;
783 
784             case BWRITERDECLUSAGE_TEXCOORD:
785                 if(usage_idx > texcoords) {
786                     WARN("dcl_texcoord%u not supported in this shader version\n", usage_idx);
787                     return E_INVALIDARG;
788                 }
789                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
790                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
791                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
792                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
793                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
794                 } else {
795                     writemask = BWRITERSP_WRITEMASK_ALL;
796                 }
797                 TRACE("v%u is t%u\n", regnum, usage_idx);
798                 This->t_regnum[usage_idx] = regnum;
799                 break;
800 
801             default:
802                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
803                 return E_INVALIDARG;
804         }
805     }
806 
807     return S_OK;
808 }
809 
810 static void ps_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
811     HRESULT hr;
812 
813     /* First check the constants and varyings, and complain if unsupported things are used */
814     if(shader->num_ci || shader->num_cb) {
815         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
816         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
817         This->state = E_INVALIDARG;
818         return;
819     }
820 
821     hr = find_ps_builtin_semantics(This, shader, 4);
822     if(FAILED(hr)) {
823         This->state = hr;
824         return;
825     }
826 
827     write_constF(shader, buffer, FALSE);
828 }
829 
830 static void ps_1_4_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
831     HRESULT hr;
832 
833     /* First check the constants and varyings, and complain if unsupported things are used */
834     if(shader->num_ci || shader->num_cb) {
835         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
836         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
837         This->state = E_INVALIDARG;
838         return;
839     }
840     hr = find_ps_builtin_semantics(This, shader, 6);
841     if(FAILED(hr)) {
842         This->state = hr;
843         return;
844     }
845 
846     write_constF(shader, buffer, FALSE);
847 }
848 
849 static void end(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
850     put_dword(buffer, D3DSIO_END);
851 }
852 
853 static DWORD map_vs_output(struct bc_writer *This, DWORD regnum, DWORD mask, DWORD *has_components) {
854     DWORD i;
855 
856     *has_components = TRUE;
857     if(regnum == This->oPos_regnum) {
858         return d3dsp_register( D3DSPR_RASTOUT, D3DSRO_POSITION );
859     }
860     if(regnum == This->oFog_regnum && mask == This->oFog_mask) {
861         *has_components = FALSE;
862         return d3dsp_register( D3DSPR_RASTOUT, D3DSRO_FOG ) | D3DSP_WRITEMASK_ALL;
863     }
864     if(regnum == This->oPts_regnum && mask == This->oPts_mask) {
865         *has_components = FALSE;
866         return d3dsp_register( D3DSPR_RASTOUT, D3DSRO_POINT_SIZE ) | D3DSP_WRITEMASK_ALL;
867     }
868     for(i = 0; i < 2; i++) {
869         if(regnum == This->oD_regnum[i]) {
870             return d3dsp_register( D3DSPR_ATTROUT, i );
871         }
872     }
873     for(i = 0; i < 8; i++) {
874         if(regnum == This->oT_regnum[i]) {
875             return d3dsp_register( D3DSPR_TEXCRDOUT, i );
876         }
877     }
878 
879     /* The varying must be undeclared - if an unsupported varying was declared,
880      * the vs_find_builtin_varyings function would have caught it and this code
881      * would not run */
882     WARN("Undeclared varying %u\n", regnum);
883     This->state = E_INVALIDARG;
884     return -1;
885 }
886 
887 static void vs_12_dstreg(struct bc_writer *This, const struct shader_reg *reg,
888                          struct bytecode_buffer *buffer,
889                          DWORD shift, DWORD mod) {
890     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
891     DWORD has_wmask;
892 
893     if(reg->rel_reg) {
894         WARN("Relative addressing not supported for destination registers\n");
895         This->state = E_INVALIDARG;
896         return;
897     }
898 
899     switch(reg->type) {
900         case BWRITERSPR_OUTPUT:
901             token |= map_vs_output(This, reg->regnum, reg->u.writemask, &has_wmask);
902             break;
903 
904         case BWRITERSPR_RASTOUT:
905         case BWRITERSPR_ATTROUT:
906             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
907             * but are unexpected. If we hit this path it might be due to an error.
908             */
909             FIXME("Unexpected register type %u\n", reg->type);
910             /* drop through */
911         case BWRITERSPR_INPUT:
912         case BWRITERSPR_TEMP:
913         case BWRITERSPR_CONST:
914             token |= d3dsp_register( reg->type, reg->regnum );
915             has_wmask = TRUE;
916             break;
917 
918         case BWRITERSPR_ADDR:
919             if(reg->regnum != 0) {
920                 WARN("Only a0 exists\n");
921                 This->state = E_INVALIDARG;
922                 return;
923             }
924             token |= d3dsp_register( D3DSPR_ADDR, 0 );
925             has_wmask = TRUE;
926             break;
927 
928         case BWRITERSPR_PREDICATE:
929             if(This->version != BWRITERVS_VERSION(2, 1)){
930                 WARN("Predicate register is allowed only in vs_2_x\n");
931                 This->state = E_INVALIDARG;
932                 return;
933             }
934             if(reg->regnum != 0) {
935                 WARN("Only predicate register p0 exists\n");
936                 This->state = E_INVALIDARG;
937                 return;
938             }
939             token |= d3dsp_register( D3DSPR_PREDICATE, 0 );
940             has_wmask = TRUE;
941             break;
942 
943         default:
944             WARN("Invalid register type for 1.x-2.x vertex shader\n");
945             This->state = E_INVALIDARG;
946             return;
947     }
948 
949     /* strictly speaking there are no modifiers in vs_2_0 and vs_1_x, but they can be written
950      * into the bytecode and since the compiler doesn't do such checks write them
951      * (the checks are done by the undocumented shader validator)
952      */
953     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
954     token |= d3d9_dstmod(mod);
955 
956     if(has_wmask) {
957         token |= d3d9_writemask(reg->u.writemask);
958     }
959     put_dword(buffer, token);
960 }
961 
962 static void vs_1_x_srcreg(struct bc_writer *This, const struct shader_reg *reg,
963                           struct bytecode_buffer *buffer) {
964     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
965     DWORD has_swizzle;
966     DWORD component;
967 
968     switch(reg->type) {
969         case BWRITERSPR_OUTPUT:
970             /* Map the swizzle to a writemask, the format expected
971                by map_vs_output
972              */
973             switch(reg->u.swizzle) {
974                 case BWRITERVS_SWIZZLE_X:
975                     component = BWRITERSP_WRITEMASK_0;
976                     break;
977                 case BWRITERVS_SWIZZLE_Y:
978                     component = BWRITERSP_WRITEMASK_1;
979                     break;
980                 case BWRITERVS_SWIZZLE_Z:
981                     component = BWRITERSP_WRITEMASK_2;
982                     break;
983                 case BWRITERVS_SWIZZLE_W:
984                     component = BWRITERSP_WRITEMASK_3;
985                     break;
986                 default:
987                     component = 0;
988             }
989             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
990             break;
991 
992         case BWRITERSPR_RASTOUT:
993         case BWRITERSPR_ATTROUT:
994             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
995              * but are unexpected. If we hit this path it might be due to an error.
996              */
997             FIXME("Unexpected register type %u\n", reg->type);
998             /* drop through */
999         case BWRITERSPR_INPUT:
1000         case BWRITERSPR_TEMP:
1001         case BWRITERSPR_CONST:
1002         case BWRITERSPR_ADDR:
1003             token |= d3dsp_register( reg->type, reg->regnum );
1004             if(reg->rel_reg) {
1005                 if(reg->rel_reg->type != BWRITERSPR_ADDR ||
1006                    reg->rel_reg->regnum != 0 ||
1007                    reg->rel_reg->u.swizzle != BWRITERVS_SWIZZLE_X) {
1008                     WARN("Relative addressing in vs_1_x is only allowed with a0.x\n");
1009                     This->state = E_INVALIDARG;
1010                     return;
1011                 }
1012                 token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1013             }
1014             break;
1015 
1016         default:
1017             WARN("Invalid register type for 1.x vshader\n");
1018             This->state = E_INVALIDARG;
1019             return;
1020     }
1021 
1022     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1023 
1024     token |= d3d9_srcmod(reg->srcmod);
1025     put_dword(buffer, token);
1026 }
1027 
1028 static void write_srcregs(struct bc_writer *This, const struct instruction *instr,
1029                           struct bytecode_buffer *buffer){
1030     unsigned int i;
1031     if(instr->has_predicate){
1032         This->funcs->srcreg(This, &instr->predicate, buffer);
1033     }
1034     for(i = 0; i < instr->num_srcs; i++){
1035         This->funcs->srcreg(This, &instr->src[i], buffer);
1036     }
1037 }
1038 
1039 static DWORD map_ps13_temp(struct bc_writer *This, const struct shader_reg *reg) {
1040     if(reg->regnum == T0_REG) {
1041         return d3dsp_register( D3DSPR_TEXTURE, 0 );
1042     } else if(reg->regnum == T1_REG) {
1043         return d3dsp_register( D3DSPR_TEXTURE, 1 );
1044     } else if(reg->regnum == T2_REG) {
1045         return d3dsp_register( D3DSPR_TEXTURE, 2 );
1046     } else if(reg->regnum == T3_REG) {
1047         return d3dsp_register( D3DSPR_TEXTURE, 3 );
1048     } else {
1049         return d3dsp_register( D3DSPR_TEMP, reg->regnum );
1050     }
1051 }
1052 
1053 static DWORD map_ps_input(struct bc_writer *This,
1054                           const struct shader_reg *reg) {
1055     DWORD i;
1056     /* Map color interpolators */
1057     for(i = 0; i < 2; i++) {
1058         if(reg->regnum == This->v_regnum[i]) {
1059             return d3dsp_register( D3DSPR_INPUT, i );
1060         }
1061     }
1062     for(i = 0; i < 8; i++) {
1063         if(reg->regnum == This->t_regnum[i]) {
1064             return d3dsp_register( D3DSPR_TEXTURE, i );
1065         }
1066     }
1067 
1068     WARN("Invalid ps 1/2 varying\n");
1069     This->state = E_INVALIDARG;
1070     return 0;
1071 }
1072 
1073 static void ps_1_0123_srcreg(struct bc_writer *This, const struct shader_reg *reg,
1074                              struct bytecode_buffer *buffer) {
1075     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1076     if(reg->rel_reg) {
1077         WARN("Relative addressing not supported in <= ps_3_0\n");
1078         This->state = E_INVALIDARG;
1079         return;
1080     }
1081 
1082     switch(reg->type) {
1083         case BWRITERSPR_INPUT:
1084             token |= map_ps_input(This, reg);
1085             break;
1086 
1087             /* Take care about the texture temporaries. There's a problem: They aren't
1088              * declared anywhere, so we can only hardcode the values that are used
1089              * to map ps_1_3 shaders to the common shader structure
1090              */
1091         case BWRITERSPR_TEMP:
1092             token |= map_ps13_temp(This, reg);
1093             break;
1094 
1095         case BWRITERSPR_CONST: /* Can be mapped 1:1 */
1096             token |= d3dsp_register( reg->type, reg->regnum );
1097             break;
1098 
1099         default:
1100             WARN("Invalid register type for <= ps_1_3 shader\n");
1101             This->state = E_INVALIDARG;
1102             return;
1103     }
1104 
1105     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1106 
1107     if(reg->srcmod == BWRITERSPSM_DZ || reg->srcmod == BWRITERSPSM_DW ||
1108        reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
1109        reg->srcmod == BWRITERSPSM_NOT) {
1110         WARN("Invalid source modifier %u for <= ps_1_3\n", reg->srcmod);
1111         This->state = E_INVALIDARG;
1112         return;
1113     }
1114     token |= d3d9_srcmod(reg->srcmod);
1115     put_dword(buffer, token);
1116 }
1117 
1118 static void ps_1_0123_dstreg(struct bc_writer *This, const struct shader_reg *reg,
1119                              struct bytecode_buffer *buffer,
1120                              DWORD shift, DWORD mod) {
1121     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1122 
1123     if(reg->rel_reg) {
1124         WARN("Relative addressing not supported for destination registers\n");
1125         This->state = E_INVALIDARG;
1126         return;
1127     }
1128 
1129     switch(reg->type) {
1130         case BWRITERSPR_TEMP:
1131             token |= map_ps13_temp(This, reg);
1132             break;
1133 
1134         /* texkill uses the input register as a destination parameter */
1135         case BWRITERSPR_INPUT:
1136             token |= map_ps_input(This, reg);
1137             break;
1138 
1139         default:
1140             WARN("Invalid dest register type for 1.x pshader\n");
1141             This->state = E_INVALIDARG;
1142             return;
1143     }
1144 
1145     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1146     token |= d3d9_dstmod(mod);
1147 
1148     token |= d3d9_writemask(reg->u.writemask);
1149     put_dword(buffer, token);
1150 }
1151 
1152 /* The length of an instruction consists of the destination register (if any),
1153  * the number of source registers, the number of address registers used for
1154  * indirect addressing, and optionally the predicate register
1155  */
1156 static DWORD instrlen(const struct instruction *instr, unsigned int srcs, unsigned int dsts) {
1157     unsigned int i;
1158     DWORD ret = srcs + dsts + (instr->has_predicate ? 1 : 0);
1159 
1160     if(dsts){
1161         if(instr->dst.rel_reg) ret++;
1162     }
1163     for(i = 0; i < srcs; i++) {
1164         if(instr->src[i].rel_reg) ret++;
1165     }
1166     return ret;
1167 }
1168 
1169 static void sm_1_x_opcode(struct bc_writer *This,
1170                           const struct instruction *instr,
1171                           DWORD token, struct bytecode_buffer *buffer) {
1172     /* In sm_1_x instruction length isn't encoded */
1173     if(instr->coissue){
1174         token |= D3DSI_COISSUE;
1175     }
1176     put_dword(buffer, token);
1177 }
1178 
1179 static void instr_handler(struct bc_writer *This,
1180                           const struct instruction *instr,
1181                           struct bytecode_buffer *buffer) {
1182     DWORD token = d3d9_opcode(instr->opcode);
1183 
1184     This->funcs->opcode(This, instr, token, buffer);
1185     if(instr->has_dst) This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1186     write_srcregs(This, instr, buffer);
1187 }
1188 
1189 static const struct instr_handler_table vs_1_x_handlers[] = {
1190     {BWRITERSIO_ADD,            instr_handler},
1191     {BWRITERSIO_NOP,            instr_handler},
1192     {BWRITERSIO_MOV,            instr_handler},
1193     {BWRITERSIO_SUB,            instr_handler},
1194     {BWRITERSIO_MAD,            instr_handler},
1195     {BWRITERSIO_MUL,            instr_handler},
1196     {BWRITERSIO_RCP,            instr_handler},
1197     {BWRITERSIO_RSQ,            instr_handler},
1198     {BWRITERSIO_DP3,            instr_handler},
1199     {BWRITERSIO_DP4,            instr_handler},
1200     {BWRITERSIO_MIN,            instr_handler},
1201     {BWRITERSIO_MAX,            instr_handler},
1202     {BWRITERSIO_SLT,            instr_handler},
1203     {BWRITERSIO_SGE,            instr_handler},
1204     {BWRITERSIO_EXP,            instr_handler},
1205     {BWRITERSIO_LOG,            instr_handler},
1206     {BWRITERSIO_EXPP,           instr_handler},
1207     {BWRITERSIO_LOGP,           instr_handler},
1208     {BWRITERSIO_DST,            instr_handler},
1209     {BWRITERSIO_FRC,            instr_handler},
1210     {BWRITERSIO_M4x4,           instr_handler},
1211     {BWRITERSIO_M4x3,           instr_handler},
1212     {BWRITERSIO_M3x4,           instr_handler},
1213     {BWRITERSIO_M3x3,           instr_handler},
1214     {BWRITERSIO_M3x2,           instr_handler},
1215     {BWRITERSIO_LIT,            instr_handler},
1216 
1217     {BWRITERSIO_END,            NULL}, /* Sentinel value, it signals
1218                                           the end of the list */
1219 };
1220 
1221 static const struct bytecode_backend vs_1_x_backend = {
1222     vs_1_x_header,
1223     end,
1224     vs_1_x_srcreg,
1225     vs_12_dstreg,
1226     sm_1_x_opcode,
1227     vs_1_x_handlers
1228 };
1229 
1230 static void instr_ps_1_0123_texld(struct bc_writer *This,
1231                                   const struct instruction *instr,
1232                                   struct bytecode_buffer *buffer) {
1233     DWORD idx;
1234     struct shader_reg reg;
1235     DWORD swizzlemask;
1236 
1237     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1238        instr->src[1].regnum > 3) {
1239         WARN("Unsupported sampler type %u regnum %u\n",
1240              instr->src[1].type, instr->src[1].regnum);
1241         This->state = E_INVALIDARG;
1242         return;
1243     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1244         WARN("Can only sample into a temp register\n");
1245         This->state = E_INVALIDARG;
1246         return;
1247     }
1248 
1249     idx = instr->src[1].regnum;
1250     if((idx == 0 && instr->dst.regnum != T0_REG) ||
1251        (idx == 1 && instr->dst.regnum != T1_REG) ||
1252        (idx == 2 && instr->dst.regnum != T2_REG) ||
1253        (idx == 3 && instr->dst.regnum != T3_REG)) {
1254         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_x\n",
1255              idx, instr->dst.regnum);
1256         This->state = E_INVALIDARG;
1257         return;
1258     }
1259     if(instr->src[0].type == BWRITERSPR_INPUT) {
1260         /* A simple non-dependent read tex instruction */
1261         if(instr->src[0].regnum != This->t_regnum[idx]) {
1262             WARN("Cannot sample from s%u with texture address data from interpolator %u\n",
1263                  idx, instr->src[0].regnum);
1264             This->state = E_INVALIDARG;
1265             return;
1266         }
1267         This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1268 
1269         /* map the temp dstreg to the ps_1_3 texture temporary register */
1270         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1271     } else if(instr->src[0].type == BWRITERSPR_TEMP) {
1272 
1273         swizzlemask = (3 << BWRITERVS_SWIZZLE_SHIFT) |
1274             (3 << (BWRITERVS_SWIZZLE_SHIFT + 2)) |
1275             (3 << (BWRITERVS_SWIZZLE_SHIFT + 4));
1276         if((instr->src[0].u.swizzle & swizzlemask) == (BWRITERVS_X_X | BWRITERVS_Y_Y | BWRITERVS_Z_Z)) {
1277             TRACE("writing texreg2rgb\n");
1278             This->funcs->opcode(This, instr, D3DSIO_TEXREG2RGB & D3DSI_OPCODE_MASK, buffer);
1279         } else if(instr->src[0].u.swizzle == (BWRITERVS_X_W | BWRITERVS_Y_X | BWRITERVS_Z_X | BWRITERVS_W_X)) {
1280             TRACE("writing texreg2ar\n");
1281             This->funcs->opcode(This, instr, D3DSIO_TEXREG2AR & D3DSI_OPCODE_MASK, buffer);
1282         } else if(instr->src[0].u.swizzle == (BWRITERVS_X_Y | BWRITERVS_Y_Z | BWRITERVS_Z_Z | BWRITERVS_W_Z)) {
1283             TRACE("writing texreg2gb\n");
1284             This->funcs->opcode(This, instr, D3DSIO_TEXREG2GB & D3DSI_OPCODE_MASK, buffer);
1285         } else {
1286             WARN("Unsupported src addr swizzle in dependent texld: 0x%08x\n", instr->src[0].u.swizzle);
1287             This->state = E_INVALIDARG;
1288             return;
1289         }
1290 
1291         /* Dst and src reg can be mapped normally. Both registers are temporary registers in the
1292          * source shader and have to be mapped to the temporary form of the texture registers. However,
1293          * the src reg doesn't have a swizzle
1294          */
1295         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1296         reg = instr->src[0];
1297         reg.u.swizzle = BWRITERVS_NOSWIZZLE;
1298         This->funcs->srcreg(This, &reg, buffer);
1299     } else {
1300         WARN("Invalid address data source register\n");
1301         This->state = E_INVALIDARG;
1302         return;
1303     }
1304 }
1305 
1306 static void instr_ps_1_0123_mov(struct bc_writer *This,
1307                                 const struct instruction *instr,
1308                                 struct bytecode_buffer *buffer) {
1309     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1310 
1311     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1312         if((instr->dst.regnum == T0_REG && instr->src[0].regnum == This->t_regnum[0]) ||
1313            (instr->dst.regnum == T1_REG && instr->src[0].regnum == This->t_regnum[1]) ||
1314            (instr->dst.regnum == T2_REG && instr->src[0].regnum == This->t_regnum[2]) ||
1315            (instr->dst.regnum == T3_REG && instr->src[0].regnum == This->t_regnum[3])) {
1316             if(instr->dstmod & BWRITERSPDM_SATURATE) {
1317                 This->funcs->opcode(This, instr, D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK, buffer);
1318                 /* Remove the SATURATE flag, it's implicit to the instruction */
1319                 This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod & (~BWRITERSPDM_SATURATE));
1320                 return;
1321             } else {
1322                 WARN("A varying -> temp copy is only supported with the SATURATE modifier in <=ps_1_3\n");
1323                 This->state = E_INVALIDARG;
1324                 return;
1325             }
1326         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1327                   instr->src[0].regnum == This->v_regnum[1]) {
1328             /* Handled by the normal mov below. Just drop out of the if condition */
1329         } else {
1330             WARN("Unsupported varying -> temp mov in <= ps_1_3\n");
1331             This->state = E_INVALIDARG;
1332             return;
1333         }
1334     }
1335 
1336     This->funcs->opcode(This, instr, token, buffer);
1337     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1338     This->funcs->srcreg(This, &instr->src[0], buffer);
1339 }
1340 
1341 static const struct instr_handler_table ps_1_0123_handlers[] = {
1342     {BWRITERSIO_ADD,            instr_handler},
1343     {BWRITERSIO_NOP,            instr_handler},
1344     {BWRITERSIO_MOV,            instr_ps_1_0123_mov},
1345     {BWRITERSIO_SUB,            instr_handler},
1346     {BWRITERSIO_MAD,            instr_handler},
1347     {BWRITERSIO_MUL,            instr_handler},
1348     {BWRITERSIO_DP3,            instr_handler},
1349     {BWRITERSIO_DP4,            instr_handler},
1350     {BWRITERSIO_LRP,            instr_handler},
1351 
1352     /* pshader instructions */
1353     {BWRITERSIO_CND,            instr_handler},
1354     {BWRITERSIO_CMP,            instr_handler},
1355     {BWRITERSIO_TEXKILL,        instr_handler},
1356     {BWRITERSIO_TEX,            instr_ps_1_0123_texld},
1357     {BWRITERSIO_TEXBEM,         instr_handler},
1358     {BWRITERSIO_TEXBEML,        instr_handler},
1359     {BWRITERSIO_TEXM3x2PAD,     instr_handler},
1360     {BWRITERSIO_TEXM3x3PAD,     instr_handler},
1361     {BWRITERSIO_TEXM3x3SPEC,    instr_handler},
1362     {BWRITERSIO_TEXM3x3VSPEC,   instr_handler},
1363     {BWRITERSIO_TEXM3x3TEX,     instr_handler},
1364     {BWRITERSIO_TEXM3x3,        instr_handler},
1365     {BWRITERSIO_TEXM3x2DEPTH,   instr_handler},
1366     {BWRITERSIO_TEXM3x2TEX,     instr_handler},
1367     {BWRITERSIO_TEXDP3,         instr_handler},
1368     {BWRITERSIO_TEXDP3TEX,      instr_handler},
1369     {BWRITERSIO_END,            NULL},
1370 };
1371 
1372 static const struct bytecode_backend ps_1_0123_backend = {
1373     ps_1_x_header,
1374     end,
1375     ps_1_0123_srcreg,
1376     ps_1_0123_dstreg,
1377     sm_1_x_opcode,
1378     ps_1_0123_handlers
1379 };
1380 
1381 static void ps_1_4_srcreg(struct bc_writer *This, const struct shader_reg *reg,
1382                           struct bytecode_buffer *buffer) {
1383     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1384     if(reg->rel_reg) {
1385         WARN("Relative addressing not supported in <= ps_3_0\n");
1386         This->state = E_INVALIDARG;
1387         return;
1388     }
1389 
1390     switch(reg->type) {
1391         case BWRITERSPR_INPUT:
1392             token |= map_ps_input(This, reg);
1393             break;
1394 
1395         /* Can be mapped 1:1 */
1396         case BWRITERSPR_TEMP:
1397         case BWRITERSPR_CONST:
1398             token |= d3dsp_register( reg->type, reg->regnum );
1399             break;
1400 
1401         default:
1402             WARN("Invalid register type for ps_1_4 shader\n");
1403             This->state = E_INVALIDARG;
1404             return;
1405     }
1406 
1407     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1408 
1409     if(reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
1410        reg->srcmod == BWRITERSPSM_NOT) {
1411         WARN("Invalid source modifier %u for ps_1_4\n", reg->srcmod);
1412         This->state = E_INVALIDARG;
1413         return;
1414     }
1415     token |= d3d9_srcmod(reg->srcmod);
1416     put_dword(buffer, token);
1417 }
1418 
1419 static void ps_1_4_dstreg(struct bc_writer *This, const struct shader_reg *reg,
1420                           struct bytecode_buffer *buffer,
1421                           DWORD shift, DWORD mod) {
1422     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1423 
1424     if(reg->rel_reg) {
1425         WARN("Relative addressing not supported for destination registers\n");
1426         This->state = E_INVALIDARG;
1427         return;
1428     }
1429 
1430     switch(reg->type) {
1431         case BWRITERSPR_TEMP: /* 1:1 mapping */
1432             token |= d3dsp_register( reg->type, reg->regnum );
1433             break;
1434 
1435 	/* For texkill */
1436         case BWRITERSPR_INPUT:
1437             token |= map_ps_input(This, reg);
1438             break;
1439 
1440         default:
1441             WARN("Invalid dest register type for 1.x pshader\n");
1442             This->state = E_INVALIDARG;
1443             return;
1444     }
1445 
1446     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1447     token |= d3d9_dstmod(mod);
1448 
1449     token |= d3d9_writemask(reg->u.writemask);
1450     put_dword(buffer, token);
1451 }
1452 
1453 static void instr_ps_1_4_mov(struct bc_writer *This,
1454                              const struct instruction *instr,
1455                              struct bytecode_buffer *buffer) {
1456     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1457 
1458     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1459         if(instr->src[0].regnum == This->t_regnum[0] ||
1460            instr->src[0].regnum == This->t_regnum[1] ||
1461            instr->src[0].regnum == This->t_regnum[2] ||
1462            instr->src[0].regnum == This->t_regnum[3] ||
1463            instr->src[0].regnum == This->t_regnum[4] ||
1464            instr->src[0].regnum == This->t_regnum[5]) {
1465             /* Similar to a regular mov, but a different opcode */
1466             token = D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK;
1467         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1468                   instr->src[0].regnum == This->v_regnum[1]) {
1469             /* Handled by the normal mov below. Just drop out of the if condition */
1470         } else {
1471             WARN("Unsupported varying -> temp mov in ps_1_4\n");
1472             This->state = E_INVALIDARG;
1473             return;
1474         }
1475     }
1476 
1477     This->funcs->opcode(This, instr, token, buffer);
1478     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1479     This->funcs->srcreg(This, &instr->src[0], buffer);
1480 }
1481 
1482 static void instr_ps_1_4_texld(struct bc_writer *This,
1483                                const struct instruction *instr,
1484                                struct bytecode_buffer *buffer) {
1485     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1486        instr->src[1].regnum > 5) {
1487         WARN("Unsupported sampler type %u regnum %u\n",
1488              instr->src[1].type, instr->src[1].regnum);
1489         This->state = E_INVALIDARG;
1490         return;
1491     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1492         WARN("Can only sample into a temp register\n");
1493         This->state = E_INVALIDARG;
1494         return;
1495     }
1496 
1497     if(instr->src[1].regnum != instr->dst.regnum) {
1498         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_4\n",
1499              instr->src[1].regnum, instr->dst.regnum);
1500         This->state = E_INVALIDARG;
1501         return;
1502     }
1503 
1504     This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1505     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1506     This->funcs->srcreg(This, &instr->src[0], buffer);
1507 }
1508 
1509 static const struct instr_handler_table ps_1_4_handlers[] = {
1510     {BWRITERSIO_ADD,            instr_handler},
1511     {BWRITERSIO_NOP,            instr_handler},
1512     {BWRITERSIO_MOV,            instr_ps_1_4_mov},
1513     {BWRITERSIO_SUB,            instr_handler},
1514     {BWRITERSIO_MAD,            instr_handler},
1515     {BWRITERSIO_MUL,            instr_handler},
1516     {BWRITERSIO_DP3,            instr_handler},
1517     {BWRITERSIO_DP4,            instr_handler},
1518     {BWRITERSIO_LRP,            instr_handler},
1519 
1520     /* pshader instructions */
1521     {BWRITERSIO_CND,            instr_handler},
1522     {BWRITERSIO_CMP,            instr_handler},
1523     {BWRITERSIO_TEXKILL,        instr_handler},
1524     {BWRITERSIO_TEX,            instr_ps_1_4_texld},
1525     {BWRITERSIO_TEXDEPTH,       instr_handler},
1526     {BWRITERSIO_BEM,            instr_handler},
1527 
1528     {BWRITERSIO_PHASE,          instr_handler},
1529     {BWRITERSIO_END,            NULL},
1530 };
1531 
1532 static const struct bytecode_backend ps_1_4_backend = {
1533     ps_1_4_header,
1534     end,
1535     ps_1_4_srcreg,
1536     ps_1_4_dstreg,
1537     sm_1_x_opcode,
1538     ps_1_4_handlers
1539 };
1540 
1541 static void write_constB(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1542     write_const(shader->constB, shader->num_cb, D3DSIO_DEFB, D3DSPR_CONSTBOOL, buffer, len);
1543 }
1544 
1545 static void write_constI(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1546     write_const(shader->constI, shader->num_ci, D3DSIO_DEFI, D3DSPR_CONSTINT, buffer, len);
1547 }
1548 
1549 static void vs_2_header(struct bc_writer *This,
1550                         const struct bwriter_shader *shader,
1551                         struct bytecode_buffer *buffer) {
1552     HRESULT hr;
1553 
1554     hr = vs_find_builtin_varyings(This, shader);
1555     if(FAILED(hr)) {
1556         This->state = hr;
1557         return;
1558     }
1559 
1560     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1561     write_constF(shader, buffer, TRUE);
1562     write_constB(shader, buffer, TRUE);
1563     write_constI(shader, buffer, TRUE);
1564 }
1565 
1566 static void vs_2_srcreg(struct bc_writer *This,
1567                         const struct shader_reg *reg,
1568                         struct bytecode_buffer *buffer) {
1569     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1570     DWORD has_swizzle;
1571     DWORD component;
1572     DWORD d3d9reg;
1573 
1574     switch(reg->type) {
1575         case BWRITERSPR_OUTPUT:
1576             /* Map the swizzle to a writemask, the format expected
1577                by map_vs_output
1578              */
1579             switch(reg->u.swizzle) {
1580                 case BWRITERVS_SWIZZLE_X:
1581                     component = BWRITERSP_WRITEMASK_0;
1582                     break;
1583                 case BWRITERVS_SWIZZLE_Y:
1584                     component = BWRITERSP_WRITEMASK_1;
1585                     break;
1586                 case BWRITERVS_SWIZZLE_Z:
1587                     component = BWRITERSP_WRITEMASK_2;
1588                     break;
1589                 case BWRITERVS_SWIZZLE_W:
1590                     component = BWRITERSP_WRITEMASK_3;
1591                     break;
1592                 default:
1593                     component = 0;
1594             }
1595             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
1596             break;
1597 
1598         case BWRITERSPR_RASTOUT:
1599         case BWRITERSPR_ATTROUT:
1600             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
1601              * but are unexpected. If we hit this path it might be due to an error.
1602              */
1603             FIXME("Unexpected register type %u\n", reg->type);
1604             /* drop through */
1605         case BWRITERSPR_INPUT:
1606         case BWRITERSPR_TEMP:
1607         case BWRITERSPR_CONST:
1608         case BWRITERSPR_ADDR:
1609         case BWRITERSPR_CONSTINT:
1610         case BWRITERSPR_CONSTBOOL:
1611         case BWRITERSPR_LABEL:
1612             d3d9reg = d3d9_register(reg->type);
1613             token |= d3dsp_register( d3d9reg, reg->regnum );
1614             break;
1615 
1616         case BWRITERSPR_LOOP:
1617             if(reg->regnum != 0) {
1618                 WARN("Only regnum 0 is supported for the loop index register in vs_2_0\n");
1619                 This->state = E_INVALIDARG;
1620                 return;
1621             }
1622             token |= d3dsp_register( D3DSPR_LOOP, 0 );
1623             break;
1624 
1625         case BWRITERSPR_PREDICATE:
1626             if(This->version != BWRITERVS_VERSION(2, 1)){
1627                 WARN("Predicate register is allowed only in vs_2_x\n");
1628                 This->state = E_INVALIDARG;
1629                 return;
1630             }
1631             if(reg->regnum > 0) {
1632                 WARN("Only predicate register 0 is supported\n");
1633                 This->state = E_INVALIDARG;
1634                 return;
1635             }
1636             token |= d3dsp_register( D3DSPR_PREDICATE, 0 );
1637             break;
1638 
1639         default:
1640             WARN("Invalid register type for 2.0 vshader\n");
1641             This->state = E_INVALIDARG;
1642             return;
1643     }
1644 
1645     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1646 
1647     token |= d3d9_srcmod(reg->srcmod);
1648 
1649     if(reg->rel_reg)
1650         token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1651 
1652     put_dword(buffer, token);
1653 
1654     /* vs_2_0 and newer write the register containing the index explicitly in the
1655      * binary code
1656      */
1657     if(token & D3DVS_ADDRMODE_RELATIVE)
1658         vs_2_srcreg(This, reg->rel_reg, buffer);
1659 }
1660 
1661 static void sm_2_opcode(struct bc_writer *This,
1662                         const struct instruction *instr,
1663                         DWORD token, struct bytecode_buffer *buffer) {
1664     /* From sm 2 onwards instruction length is encoded in the opcode field */
1665     int dsts = instr->has_dst ? 1 : 0;
1666     token |= instrlen(instr, instr->num_srcs, dsts) << D3DSI_INSTLENGTH_SHIFT;
1667     if(instr->comptype)
1668         token |= (d3d9_comparetype(instr->comptype) << 16) & (0xf << 16);
1669     if(instr->has_predicate)
1670         token |= D3DSHADER_INSTRUCTION_PREDICATED;
1671     put_dword(buffer,token);
1672 }
1673 
1674 static const struct instr_handler_table vs_2_0_handlers[] = {
1675     {BWRITERSIO_ADD,            instr_handler},
1676     {BWRITERSIO_NOP,            instr_handler},
1677     {BWRITERSIO_MOV,            instr_handler},
1678     {BWRITERSIO_SUB,            instr_handler},
1679     {BWRITERSIO_MAD,            instr_handler},
1680     {BWRITERSIO_MUL,            instr_handler},
1681     {BWRITERSIO_RCP,            instr_handler},
1682     {BWRITERSIO_RSQ,            instr_handler},
1683     {BWRITERSIO_DP3,            instr_handler},
1684     {BWRITERSIO_DP4,            instr_handler},
1685     {BWRITERSIO_MIN,            instr_handler},
1686     {BWRITERSIO_MAX,            instr_handler},
1687     {BWRITERSIO_SLT,            instr_handler},
1688     {BWRITERSIO_SGE,            instr_handler},
1689     {BWRITERSIO_ABS,            instr_handler},
1690     {BWRITERSIO_EXP,            instr_handler},
1691     {BWRITERSIO_LOG,            instr_handler},
1692     {BWRITERSIO_EXPP,           instr_handler},
1693     {BWRITERSIO_LOGP,           instr_handler},
1694     {BWRITERSIO_DST,            instr_handler},
1695     {BWRITERSIO_LRP,            instr_handler},
1696     {BWRITERSIO_FRC,            instr_handler},
1697     {BWRITERSIO_CRS,            instr_handler},
1698     {BWRITERSIO_SGN,            instr_handler},
1699     {BWRITERSIO_NRM,            instr_handler},
1700     {BWRITERSIO_SINCOS,         instr_handler},
1701     {BWRITERSIO_M4x4,           instr_handler},
1702     {BWRITERSIO_M4x3,           instr_handler},
1703     {BWRITERSIO_M3x4,           instr_handler},
1704     {BWRITERSIO_M3x3,           instr_handler},
1705     {BWRITERSIO_M3x2,           instr_handler},
1706     {BWRITERSIO_LIT,            instr_handler},
1707     {BWRITERSIO_POW,            instr_handler},
1708     {BWRITERSIO_MOVA,           instr_handler},
1709 
1710     {BWRITERSIO_CALL,           instr_handler},
1711     {BWRITERSIO_CALLNZ,         instr_handler},
1712     {BWRITERSIO_REP,            instr_handler},
1713     {BWRITERSIO_ENDREP,         instr_handler},
1714     {BWRITERSIO_IF,             instr_handler},
1715     {BWRITERSIO_LABEL,          instr_handler},
1716     {BWRITERSIO_ELSE,           instr_handler},
1717     {BWRITERSIO_ENDIF,          instr_handler},
1718     {BWRITERSIO_LOOP,           instr_handler},
1719     {BWRITERSIO_RET,            instr_handler},
1720     {BWRITERSIO_ENDLOOP,        instr_handler},
1721 
1722     {BWRITERSIO_END,            NULL},
1723 };
1724 
1725 static const struct bytecode_backend vs_2_0_backend = {
1726     vs_2_header,
1727     end,
1728     vs_2_srcreg,
1729     vs_12_dstreg,
1730     sm_2_opcode,
1731     vs_2_0_handlers
1732 };
1733 
1734 static const struct instr_handler_table vs_2_x_handlers[] = {
1735     {BWRITERSIO_ADD,            instr_handler},
1736     {BWRITERSIO_NOP,            instr_handler},
1737     {BWRITERSIO_MOV,            instr_handler},
1738     {BWRITERSIO_SUB,            instr_handler},
1739     {BWRITERSIO_MAD,            instr_handler},
1740     {BWRITERSIO_MUL,            instr_handler},
1741     {BWRITERSIO_RCP,            instr_handler},
1742     {BWRITERSIO_RSQ,            instr_handler},
1743     {BWRITERSIO_DP3,            instr_handler},
1744     {BWRITERSIO_DP4,            instr_handler},
1745     {BWRITERSIO_MIN,            instr_handler},
1746     {BWRITERSIO_MAX,            instr_handler},
1747     {BWRITERSIO_SLT,            instr_handler},
1748     {BWRITERSIO_SGE,            instr_handler},
1749     {BWRITERSIO_ABS,            instr_handler},
1750     {BWRITERSIO_EXP,            instr_handler},
1751     {BWRITERSIO_LOG,            instr_handler},
1752     {BWRITERSIO_EXPP,           instr_handler},
1753     {BWRITERSIO_LOGP,           instr_handler},
1754     {BWRITERSIO_DST,            instr_handler},
1755     {BWRITERSIO_LRP,            instr_handler},
1756     {BWRITERSIO_FRC,            instr_handler},
1757     {BWRITERSIO_CRS,            instr_handler},
1758     {BWRITERSIO_SGN,            instr_handler},
1759     {BWRITERSIO_NRM,            instr_handler},
1760     {BWRITERSIO_SINCOS,         instr_handler},
1761     {BWRITERSIO_M4x4,           instr_handler},
1762     {BWRITERSIO_M4x3,           instr_handler},
1763     {BWRITERSIO_M3x4,           instr_handler},
1764     {BWRITERSIO_M3x3,           instr_handler},
1765     {BWRITERSIO_M3x2,           instr_handler},
1766     {BWRITERSIO_LIT,            instr_handler},
1767     {BWRITERSIO_POW,            instr_handler},
1768     {BWRITERSIO_MOVA,           instr_handler},
1769 
1770     {BWRITERSIO_CALL,           instr_handler},
1771     {BWRITERSIO_CALLNZ,         instr_handler},
1772     {BWRITERSIO_REP,            instr_handler},
1773     {BWRITERSIO_ENDREP,         instr_handler},
1774     {BWRITERSIO_IF,             instr_handler},
1775     {BWRITERSIO_LABEL,          instr_handler},
1776     {BWRITERSIO_IFC,            instr_handler},
1777     {BWRITERSIO_ELSE,           instr_handler},
1778     {BWRITERSIO_ENDIF,          instr_handler},
1779     {BWRITERSIO_BREAK,          instr_handler},
1780     {BWRITERSIO_BREAKC,         instr_handler},
1781     {BWRITERSIO_LOOP,           instr_handler},
1782     {BWRITERSIO_RET,            instr_handler},
1783     {BWRITERSIO_ENDLOOP,        instr_handler},
1784 
1785     {BWRITERSIO_SETP,           instr_handler},
1786     {BWRITERSIO_BREAKP,         instr_handler},
1787 
1788     {BWRITERSIO_END,            NULL},
1789 };
1790 
1791 static const struct bytecode_backend vs_2_x_backend = {
1792     vs_2_header,
1793     end,
1794     vs_2_srcreg,
1795     vs_12_dstreg,
1796     sm_2_opcode,
1797     vs_2_x_handlers
1798 };
1799 
1800 static void write_samplers(const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1801     DWORD i;
1802     DWORD instr_dcl = D3DSIO_DCL | (2 << D3DSI_INSTLENGTH_SHIFT);
1803     DWORD token;
1804     const DWORD reg = (1u << 31) | d3dsp_register( D3DSPR_SAMPLER, 0 ) | D3DSP_WRITEMASK_ALL;
1805 
1806     for(i = 0; i < shader->num_samplers; i++) {
1807         /* Write the DCL instruction */
1808         put_dword(buffer, instr_dcl);
1809         token = (1u << 31);
1810         /* Already shifted */
1811         token |= (d3d9_sampler(shader->samplers[i].type)) & D3DSP_TEXTURETYPE_MASK;
1812         put_dword(buffer, token);
1813         token = reg | (shader->samplers[i].regnum & D3DSP_REGNUM_MASK);
1814         token |= d3d9_dstmod(shader->samplers[i].mod);
1815         put_dword(buffer, token);
1816     }
1817 }
1818 
1819 static void ps_2_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1820     HRESULT hr = find_ps_builtin_semantics(This, shader, 8);
1821     if(FAILED(hr)) {
1822         This->state = hr;
1823         return;
1824     }
1825 
1826     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1827     write_samplers(shader, buffer);
1828     write_constF(shader, buffer, TRUE);
1829     write_constB(shader, buffer, TRUE);
1830     write_constI(shader, buffer, TRUE);
1831 }
1832 
1833 static void ps_2_srcreg(struct bc_writer *This,
1834                         const struct shader_reg *reg,
1835                         struct bytecode_buffer *buffer) {
1836     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1837     DWORD d3d9reg;
1838     if(reg->rel_reg) {
1839         WARN("Relative addressing not supported in <= ps_3_0\n");
1840         This->state = E_INVALIDARG;
1841         return;
1842     }
1843 
1844     switch(reg->type) {
1845         case BWRITERSPR_INPUT:
1846             token |= map_ps_input(This, reg);
1847             break;
1848 
1849             /* Can be mapped 1:1 */
1850         case BWRITERSPR_TEMP:
1851         case BWRITERSPR_CONST:
1852         case BWRITERSPR_COLOROUT:
1853         case BWRITERSPR_CONSTBOOL:
1854         case BWRITERSPR_CONSTINT:
1855         case BWRITERSPR_SAMPLER:
1856         case BWRITERSPR_LABEL:
1857         case BWRITERSPR_DEPTHOUT:
1858             d3d9reg = d3d9_register(reg->type);
1859             token |= d3dsp_register( d3d9reg, reg->regnum );
1860             break;
1861 
1862         case BWRITERSPR_PREDICATE:
1863             if(This->version != BWRITERPS_VERSION(2, 1)){
1864                 WARN("Predicate register not supported in ps_2_0\n");
1865                 This->state = E_INVALIDARG;
1866             }
1867             if(reg->regnum) {
1868                 WARN("Predicate register with regnum %u not supported\n",
1869                      reg->regnum);
1870                 This->state = E_INVALIDARG;
1871             }
1872             token |= d3dsp_register( D3DSPR_PREDICATE, 0 );
1873             break;
1874 
1875         default:
1876             WARN("Invalid register type for ps_2_0 shader\n");
1877             This->state = E_INVALIDARG;
1878             return;
1879     }
1880 
1881     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1882 
1883     token |= d3d9_srcmod(reg->srcmod);
1884     put_dword(buffer, token);
1885 }
1886 
1887 static void ps_2_0_dstreg(struct bc_writer *This,
1888                           const struct shader_reg *reg,
1889                           struct bytecode_buffer *buffer,
1890                           DWORD shift, DWORD mod) {
1891     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1892     DWORD d3d9reg;
1893 
1894     if(reg->rel_reg) {
1895         WARN("Relative addressing not supported for destination registers\n");
1896         This->state = E_INVALIDARG;
1897         return;
1898     }
1899 
1900     switch(reg->type) {
1901         case BWRITERSPR_TEMP: /* 1:1 mapping */
1902         case BWRITERSPR_COLOROUT:
1903         case BWRITERSPR_DEPTHOUT:
1904             d3d9reg = d3d9_register(reg->type);
1905             token |= d3dsp_register( d3d9reg, reg->regnum );
1906             break;
1907 
1908         case BWRITERSPR_PREDICATE:
1909             if(This->version != BWRITERPS_VERSION(2, 1)){
1910                 WARN("Predicate register not supported in ps_2_0\n");
1911                 This->state = E_INVALIDARG;
1912             }
1913             token |= d3dsp_register( D3DSPR_PREDICATE, reg->regnum );
1914             break;
1915 
1916 	/* texkill uses the input register as a destination parameter */
1917         case BWRITERSPR_INPUT:
1918             token |= map_ps_input(This, reg);
1919             break;
1920 
1921         default:
1922             WARN("Invalid dest register type for 2.x pshader\n");
1923             This->state = E_INVALIDARG;
1924             return;
1925     }
1926 
1927     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1928     token |= d3d9_dstmod(mod);
1929 
1930     token |= d3d9_writemask(reg->u.writemask);
1931     put_dword(buffer, token);
1932 }
1933 
1934 static const struct instr_handler_table ps_2_0_handlers[] = {
1935     {BWRITERSIO_ADD,            instr_handler},
1936     {BWRITERSIO_NOP,            instr_handler},
1937     {BWRITERSIO_MOV,            instr_handler},
1938     {BWRITERSIO_SUB,            instr_handler},
1939     {BWRITERSIO_MAD,            instr_handler},
1940     {BWRITERSIO_MUL,            instr_handler},
1941     {BWRITERSIO_RCP,            instr_handler},
1942     {BWRITERSIO_RSQ,            instr_handler},
1943     {BWRITERSIO_DP3,            instr_handler},
1944     {BWRITERSIO_DP4,            instr_handler},
1945     {BWRITERSIO_MIN,            instr_handler},
1946     {BWRITERSIO_MAX,            instr_handler},
1947     {BWRITERSIO_ABS,            instr_handler},
1948     {BWRITERSIO_EXP,            instr_handler},
1949     {BWRITERSIO_LOG,            instr_handler},
1950     {BWRITERSIO_EXPP,           instr_handler},
1951     {BWRITERSIO_LOGP,           instr_handler},
1952     {BWRITERSIO_LRP,            instr_handler},
1953     {BWRITERSIO_FRC,            instr_handler},
1954     {BWRITERSIO_CRS,            instr_handler},
1955     {BWRITERSIO_NRM,            instr_handler},
1956     {BWRITERSIO_SINCOS,         instr_handler},
1957     {BWRITERSIO_M4x4,           instr_handler},
1958     {BWRITERSIO_M4x3,           instr_handler},
1959     {BWRITERSIO_M3x4,           instr_handler},
1960     {BWRITERSIO_M3x3,           instr_handler},
1961     {BWRITERSIO_M3x2,           instr_handler},
1962     {BWRITERSIO_POW,            instr_handler},
1963     {BWRITERSIO_DP2ADD,         instr_handler},
1964     {BWRITERSIO_CMP,            instr_handler},
1965 
1966     {BWRITERSIO_TEX,            instr_handler},
1967     {BWRITERSIO_TEXLDP,         instr_handler},
1968     {BWRITERSIO_TEXLDB,         instr_handler},
1969     {BWRITERSIO_TEXKILL,        instr_handler},
1970 
1971     {BWRITERSIO_END,            NULL},
1972 };
1973 
1974 static const struct bytecode_backend ps_2_0_backend = {
1975     ps_2_header,
1976     end,
1977     ps_2_srcreg,
1978     ps_2_0_dstreg,
1979     sm_2_opcode,
1980     ps_2_0_handlers
1981 };
1982 
1983 static const struct instr_handler_table ps_2_x_handlers[] = {
1984     {BWRITERSIO_ADD,            instr_handler},
1985     {BWRITERSIO_NOP,            instr_handler},
1986     {BWRITERSIO_MOV,            instr_handler},
1987     {BWRITERSIO_SUB,            instr_handler},
1988     {BWRITERSIO_MAD,            instr_handler},
1989     {BWRITERSIO_MUL,            instr_handler},
1990     {BWRITERSIO_RCP,            instr_handler},
1991     {BWRITERSIO_RSQ,            instr_handler},
1992     {BWRITERSIO_DP3,            instr_handler},
1993     {BWRITERSIO_DP4,            instr_handler},
1994     {BWRITERSIO_MIN,            instr_handler},
1995     {BWRITERSIO_MAX,            instr_handler},
1996     {BWRITERSIO_ABS,            instr_handler},
1997     {BWRITERSIO_EXP,            instr_handler},
1998     {BWRITERSIO_LOG,            instr_handler},
1999     {BWRITERSIO_EXPP,           instr_handler},
2000     {BWRITERSIO_LOGP,           instr_handler},
2001     {BWRITERSIO_LRP,            instr_handler},
2002     {BWRITERSIO_FRC,            instr_handler},
2003     {BWRITERSIO_CRS,            instr_handler},
2004     {BWRITERSIO_NRM,            instr_handler},
2005     {BWRITERSIO_SINCOS,         instr_handler},
2006     {BWRITERSIO_M4x4,           instr_handler},
2007     {BWRITERSIO_M4x3,           instr_handler},
2008     {BWRITERSIO_M3x4,           instr_handler},
2009     {BWRITERSIO_M3x3,           instr_handler},
2010     {BWRITERSIO_M3x2,           instr_handler},
2011     {BWRITERSIO_POW,            instr_handler},
2012     {BWRITERSIO_DP2ADD,         instr_handler},
2013     {BWRITERSIO_CMP,            instr_handler},
2014 
2015     {BWRITERSIO_CALL,           instr_handler},
2016     {BWRITERSIO_CALLNZ,         instr_handler},
2017     {BWRITERSIO_REP,            instr_handler},
2018     {BWRITERSIO_ENDREP,         instr_handler},
2019     {BWRITERSIO_IF,             instr_handler},
2020     {BWRITERSIO_LABEL,          instr_handler},
2021     {BWRITERSIO_IFC,            instr_handler},
2022     {BWRITERSIO_ELSE,           instr_handler},
2023     {BWRITERSIO_ENDIF,          instr_handler},
2024     {BWRITERSIO_BREAK,          instr_handler},
2025     {BWRITERSIO_BREAKC,         instr_handler},
2026     {BWRITERSIO_RET,            instr_handler},
2027 
2028     {BWRITERSIO_TEX,            instr_handler},
2029     {BWRITERSIO_TEXLDP,         instr_handler},
2030     {BWRITERSIO_TEXLDB,         instr_handler},
2031     {BWRITERSIO_TEXKILL,        instr_handler},
2032     {BWRITERSIO_DSX,            instr_handler},
2033     {BWRITERSIO_DSY,            instr_handler},
2034 
2035     {BWRITERSIO_SETP,           instr_handler},
2036     {BWRITERSIO_BREAKP,         instr_handler},
2037 
2038     {BWRITERSIO_TEXLDD,         instr_handler},
2039 
2040     {BWRITERSIO_END,            NULL},
2041 };
2042 
2043 static const struct bytecode_backend ps_2_x_backend = {
2044     ps_2_header,
2045     end,
2046     ps_2_srcreg,
2047     ps_2_0_dstreg,
2048     sm_2_opcode,
2049     ps_2_x_handlers
2050 };
2051 
2052 static void sm_3_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
2053     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
2054     write_declarations(This, buffer, TRUE, shader->outputs, shader->num_outputs, BWRITERSPR_OUTPUT);
2055     write_constF(shader, buffer, TRUE);
2056     write_constB(shader, buffer, TRUE);
2057     write_constI(shader, buffer, TRUE);
2058     write_samplers(shader, buffer);
2059 }
2060 
2061 static void sm_3_srcreg(struct bc_writer *This,
2062                         const struct shader_reg *reg,
2063                         struct bytecode_buffer *buffer) {
2064     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
2065     DWORD d3d9reg;
2066 
2067     d3d9reg = d3d9_register(reg->type);
2068     token |= d3dsp_register( d3d9reg, reg->regnum );
2069     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK;
2070     token |= d3d9_srcmod(reg->srcmod);
2071 
2072     if(reg->rel_reg) {
2073         if(reg->type == BWRITERSPR_CONST && This->version == BWRITERPS_VERSION(3, 0)) {
2074             WARN("c%u[...] is unsupported in ps_3_0\n", reg->regnum);
2075             This->state = E_INVALIDARG;
2076             return;
2077         }
2078         if(((reg->rel_reg->type == BWRITERSPR_ADDR && This->version == BWRITERVS_VERSION(3, 0)) ||
2079            reg->rel_reg->type == BWRITERSPR_LOOP) &&
2080            reg->rel_reg->regnum == 0) {
2081             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
2082         } else {
2083             WARN("Unsupported relative addressing register\n");
2084             This->state = E_INVALIDARG;
2085             return;
2086         }
2087     }
2088 
2089     put_dword(buffer, token);
2090 
2091     /* vs_2_0 and newer write the register containing the index explicitly in the
2092      * binary code
2093      */
2094     if(token & D3DVS_ADDRMODE_RELATIVE) {
2095         sm_3_srcreg(This, reg->rel_reg, buffer);
2096     }
2097 }
2098 
2099 static void sm_3_dstreg(struct bc_writer *This,
2100                         const struct shader_reg *reg,
2101                         struct bytecode_buffer *buffer,
2102                         DWORD shift, DWORD mod) {
2103     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
2104     DWORD d3d9reg;
2105 
2106     if(reg->rel_reg) {
2107         if(This->version == BWRITERVS_VERSION(3, 0) &&
2108            reg->type == BWRITERSPR_OUTPUT) {
2109             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
2110         } else {
2111             WARN("Relative addressing not supported for this shader type or register type\n");
2112             This->state = E_INVALIDARG;
2113             return;
2114         }
2115     }
2116 
2117     d3d9reg = d3d9_register(reg->type);
2118     token |= d3dsp_register( d3d9reg, reg->regnum );
2119     token |= d3d9_dstmod(mod);
2120     token |= d3d9_writemask(reg->u.writemask);
2121     put_dword(buffer, token);
2122 
2123     /* vs_2_0 and newer write the register containing the index explicitly in the
2124      * binary code
2125      */
2126     if(token & D3DVS_ADDRMODE_RELATIVE) {
2127         sm_3_srcreg(This, reg->rel_reg, buffer);
2128     }
2129 }
2130 
2131 static const struct instr_handler_table vs_3_handlers[] = {
2132     {BWRITERSIO_ADD,            instr_handler},
2133     {BWRITERSIO_NOP,            instr_handler},
2134     {BWRITERSIO_MOV,            instr_handler},
2135     {BWRITERSIO_SUB,            instr_handler},
2136     {BWRITERSIO_MAD,            instr_handler},
2137     {BWRITERSIO_MUL,            instr_handler},
2138     {BWRITERSIO_RCP,            instr_handler},
2139     {BWRITERSIO_RSQ,            instr_handler},
2140     {BWRITERSIO_DP3,            instr_handler},
2141     {BWRITERSIO_DP4,            instr_handler},
2142     {BWRITERSIO_MIN,            instr_handler},
2143     {BWRITERSIO_MAX,            instr_handler},
2144     {BWRITERSIO_SLT,            instr_handler},
2145     {BWRITERSIO_SGE,            instr_handler},
2146     {BWRITERSIO_ABS,            instr_handler},
2147     {BWRITERSIO_EXP,            instr_handler},
2148     {BWRITERSIO_LOG,            instr_handler},
2149     {BWRITERSIO_EXPP,           instr_handler},
2150     {BWRITERSIO_LOGP,           instr_handler},
2151     {BWRITERSIO_DST,            instr_handler},
2152     {BWRITERSIO_LRP,            instr_handler},
2153     {BWRITERSIO_FRC,            instr_handler},
2154     {BWRITERSIO_CRS,            instr_handler},
2155     {BWRITERSIO_SGN,            instr_handler},
2156     {BWRITERSIO_NRM,            instr_handler},
2157     {BWRITERSIO_SINCOS,         instr_handler},
2158     {BWRITERSIO_M4x4,           instr_handler},
2159     {BWRITERSIO_M4x3,           instr_handler},
2160     {BWRITERSIO_M3x4,           instr_handler},
2161     {BWRITERSIO_M3x3,           instr_handler},
2162     {BWRITERSIO_M3x2,           instr_handler},
2163     {BWRITERSIO_LIT,            instr_handler},
2164     {BWRITERSIO_POW,            instr_handler},
2165     {BWRITERSIO_MOVA,           instr_handler},
2166 
2167     {BWRITERSIO_CALL,           instr_handler},
2168     {BWRITERSIO_CALLNZ,         instr_handler},
2169     {BWRITERSIO_REP,            instr_handler},
2170     {BWRITERSIO_ENDREP,         instr_handler},
2171     {BWRITERSIO_IF,             instr_handler},
2172     {BWRITERSIO_LABEL,          instr_handler},
2173     {BWRITERSIO_IFC,            instr_handler},
2174     {BWRITERSIO_ELSE,           instr_handler},
2175     {BWRITERSIO_ENDIF,          instr_handler},
2176     {BWRITERSIO_BREAK,          instr_handler},
2177     {BWRITERSIO_BREAKC,         instr_handler},
2178     {BWRITERSIO_LOOP,           instr_handler},
2179     {BWRITERSIO_RET,            instr_handler},
2180     {BWRITERSIO_ENDLOOP,        instr_handler},
2181 
2182     {BWRITERSIO_SETP,           instr_handler},
2183     {BWRITERSIO_BREAKP,         instr_handler},
2184     {BWRITERSIO_TEXLDL,         instr_handler},
2185 
2186     {BWRITERSIO_END,            NULL},
2187 };
2188 
2189 static const struct bytecode_backend vs_3_backend = {
2190     sm_3_header,
2191     end,
2192     sm_3_srcreg,
2193     sm_3_dstreg,
2194     sm_2_opcode,
2195     vs_3_handlers
2196 };
2197 
2198 static const struct instr_handler_table ps_3_handlers[] = {
2199     {BWRITERSIO_ADD,            instr_handler},
2200     {BWRITERSIO_NOP,            instr_handler},
2201     {BWRITERSIO_MOV,            instr_handler},
2202     {BWRITERSIO_SUB,            instr_handler},
2203     {BWRITERSIO_MAD,            instr_handler},
2204     {BWRITERSIO_MUL,            instr_handler},
2205     {BWRITERSIO_RCP,            instr_handler},
2206     {BWRITERSIO_RSQ,            instr_handler},
2207     {BWRITERSIO_DP3,            instr_handler},
2208     {BWRITERSIO_DP4,            instr_handler},
2209     {BWRITERSIO_MIN,            instr_handler},
2210     {BWRITERSIO_MAX,            instr_handler},
2211     {BWRITERSIO_ABS,            instr_handler},
2212     {BWRITERSIO_EXP,            instr_handler},
2213     {BWRITERSIO_LOG,            instr_handler},
2214     {BWRITERSIO_EXPP,           instr_handler},
2215     {BWRITERSIO_LOGP,           instr_handler},
2216     {BWRITERSIO_LRP,            instr_handler},
2217     {BWRITERSIO_FRC,            instr_handler},
2218     {BWRITERSIO_CRS,            instr_handler},
2219     {BWRITERSIO_NRM,            instr_handler},
2220     {BWRITERSIO_SINCOS,         instr_handler},
2221     {BWRITERSIO_M4x4,           instr_handler},
2222     {BWRITERSIO_M4x3,           instr_handler},
2223     {BWRITERSIO_M3x4,           instr_handler},
2224     {BWRITERSIO_M3x3,           instr_handler},
2225     {BWRITERSIO_M3x2,           instr_handler},
2226     {BWRITERSIO_POW,            instr_handler},
2227     {BWRITERSIO_DP2ADD,         instr_handler},
2228     {BWRITERSIO_CMP,            instr_handler},
2229 
2230     {BWRITERSIO_CALL,           instr_handler},
2231     {BWRITERSIO_CALLNZ,         instr_handler},
2232     {BWRITERSIO_REP,            instr_handler},
2233     {BWRITERSIO_ENDREP,         instr_handler},
2234     {BWRITERSIO_IF,             instr_handler},
2235     {BWRITERSIO_LABEL,          instr_handler},
2236     {BWRITERSIO_IFC,            instr_handler},
2237     {BWRITERSIO_ELSE,           instr_handler},
2238     {BWRITERSIO_ENDIF,          instr_handler},
2239     {BWRITERSIO_BREAK,          instr_handler},
2240     {BWRITERSIO_BREAKC,         instr_handler},
2241     {BWRITERSIO_LOOP,           instr_handler},
2242     {BWRITERSIO_RET,            instr_handler},
2243     {BWRITERSIO_ENDLOOP,        instr_handler},
2244 
2245     {BWRITERSIO_SETP,           instr_handler},
2246     {BWRITERSIO_BREAKP,         instr_handler},
2247     {BWRITERSIO_TEXLDL,         instr_handler},
2248 
2249     {BWRITERSIO_TEX,            instr_handler},
2250     {BWRITERSIO_TEXLDP,         instr_handler},
2251     {BWRITERSIO_TEXLDB,         instr_handler},
2252     {BWRITERSIO_TEXKILL,        instr_handler},
2253     {BWRITERSIO_DSX,            instr_handler},
2254     {BWRITERSIO_DSY,            instr_handler},
2255     {BWRITERSIO_TEXLDD,         instr_handler},
2256 
2257     {BWRITERSIO_END,            NULL},
2258 };
2259 
2260 static const struct bytecode_backend ps_3_backend = {
2261     sm_3_header,
2262     end,
2263     sm_3_srcreg,
2264     sm_3_dstreg,
2265     sm_2_opcode,
2266     ps_3_handlers
2267 };
2268 
2269 static void init_vs10_dx9_writer(struct bc_writer *writer) {
2270     TRACE("Creating DirectX9 vertex shader 1.0 writer\n");
2271     writer->funcs = &vs_1_x_backend;
2272 }
2273 
2274 static void init_vs11_dx9_writer(struct bc_writer *writer) {
2275     TRACE("Creating DirectX9 vertex shader 1.1 writer\n");
2276     writer->funcs = &vs_1_x_backend;
2277 }
2278 
2279 static void init_vs20_dx9_writer(struct bc_writer *writer) {
2280     TRACE("Creating DirectX9 vertex shader 2.0 writer\n");
2281     writer->funcs = &vs_2_0_backend;
2282 }
2283 
2284 static void init_vs2x_dx9_writer(struct bc_writer *writer) {
2285     TRACE("Creating DirectX9 vertex shader 2.x writer\n");
2286     writer->funcs = &vs_2_x_backend;
2287 }
2288 
2289 static void init_vs30_dx9_writer(struct bc_writer *writer) {
2290     TRACE("Creating DirectX9 vertex shader 3.0 writer\n");
2291     writer->funcs = &vs_3_backend;
2292 }
2293 
2294 static void init_ps10_dx9_writer(struct bc_writer *writer) {
2295     TRACE("Creating DirectX9 pixel shader 1.0 writer\n");
2296     writer->funcs = &ps_1_0123_backend;
2297 }
2298 
2299 static void init_ps11_dx9_writer(struct bc_writer *writer) {
2300     TRACE("Creating DirectX9 pixel shader 1.1 writer\n");
2301     writer->funcs = &ps_1_0123_backend;
2302 }
2303 
2304 static void init_ps12_dx9_writer(struct bc_writer *writer) {
2305     TRACE("Creating DirectX9 pixel shader 1.2 writer\n");
2306     writer->funcs = &ps_1_0123_backend;
2307 }
2308 
2309 static void init_ps13_dx9_writer(struct bc_writer *writer) {
2310     TRACE("Creating DirectX9 pixel shader 1.3 writer\n");
2311     writer->funcs = &ps_1_0123_backend;
2312 }
2313 
2314 static void init_ps14_dx9_writer(struct bc_writer *writer) {
2315     TRACE("Creating DirectX9 pixel shader 1.4 writer\n");
2316     writer->funcs = &ps_1_4_backend;
2317 }
2318 
2319 static void init_ps20_dx9_writer(struct bc_writer *writer) {
2320     TRACE("Creating DirectX9 pixel shader 2.0 writer\n");
2321     writer->funcs = &ps_2_0_backend;
2322 }
2323 
2324 static void init_ps2x_dx9_writer(struct bc_writer *writer) {
2325     TRACE("Creating DirectX9 pixel shader 2.x writer\n");
2326     writer->funcs = &ps_2_x_backend;
2327 }
2328 
2329 static void init_ps30_dx9_writer(struct bc_writer *writer) {
2330     TRACE("Creating DirectX9 pixel shader 3.0 writer\n");
2331     writer->funcs = &ps_3_backend;
2332 }
2333 
2334 static struct bc_writer *create_writer(DWORD version, DWORD dxversion) {
2335     struct bc_writer *ret = d3dcompiler_alloc(sizeof(*ret));
2336 
2337     if(!ret) {
2338         WARN("Failed to allocate a bytecode writer instance\n");
2339         return NULL;
2340     }
2341 
2342     switch(version) {
2343         case BWRITERVS_VERSION(1, 0):
2344             if(dxversion != 9) {
2345                 WARN("Unsupported dxversion for vertex shader 1.0 requested: %u\n", dxversion);
2346                 goto fail;
2347             }
2348             init_vs10_dx9_writer(ret);
2349             break;
2350         case BWRITERVS_VERSION(1, 1):
2351             if(dxversion != 9) {
2352                 WARN("Unsupported dxversion for vertex shader 1.1 requested: %u\n", dxversion);
2353                 goto fail;
2354             }
2355             init_vs11_dx9_writer(ret);
2356             break;
2357         case BWRITERVS_VERSION(2, 0):
2358             if(dxversion != 9) {
2359                 WARN("Unsupported dxversion for vertex shader 2.0 requested: %u\n", dxversion);
2360                 goto fail;
2361             }
2362             init_vs20_dx9_writer(ret);
2363             break;
2364         case BWRITERVS_VERSION(2, 1):
2365             if(dxversion != 9) {
2366                 WARN("Unsupported dxversion for vertex shader 2.x requested: %u\n", dxversion);
2367                 goto fail;
2368             }
2369             init_vs2x_dx9_writer(ret);
2370             break;
2371         case BWRITERVS_VERSION(3, 0):
2372             if(dxversion != 9) {
2373                 WARN("Unsupported dxversion for vertex shader 3.0 requested: %u\n", dxversion);
2374                 goto fail;
2375             }
2376             init_vs30_dx9_writer(ret);
2377             break;
2378 
2379         case BWRITERPS_VERSION(1, 0):
2380             if(dxversion != 9) {
2381                 WARN("Unsupported dxversion for pixel shader 1.0 requested: %u\n", dxversion);
2382                 goto fail;
2383             }
2384             init_ps10_dx9_writer(ret);
2385             break;
2386         case BWRITERPS_VERSION(1, 1):
2387             if(dxversion != 9) {
2388                 WARN("Unsupported dxversion for pixel shader 1.1 requested: %u\n", dxversion);
2389                 goto fail;
2390             }
2391             init_ps11_dx9_writer(ret);
2392             break;
2393         case BWRITERPS_VERSION(1, 2):
2394             if(dxversion != 9) {
2395                 WARN("Unsupported dxversion for pixel shader 1.2 requested: %u\n", dxversion);
2396                 goto fail;
2397             }
2398             init_ps12_dx9_writer(ret);
2399             break;
2400         case BWRITERPS_VERSION(1, 3):
2401             if(dxversion != 9) {
2402                 WARN("Unsupported dxversion for pixel shader 1.3 requested: %u\n", dxversion);
2403                 goto fail;
2404             }
2405             init_ps13_dx9_writer(ret);
2406             break;
2407         case BWRITERPS_VERSION(1, 4):
2408             if(dxversion != 9) {
2409                 WARN("Unsupported dxversion for pixel shader 1.4 requested: %u\n", dxversion);
2410                 goto fail;
2411             }
2412             init_ps14_dx9_writer(ret);
2413             break;
2414 
2415         case BWRITERPS_VERSION(2, 0):
2416             if(dxversion != 9) {
2417                 WARN("Unsupported dxversion for pixel shader 2.0 requested: %u\n", dxversion);
2418                 goto fail;
2419             }
2420             init_ps20_dx9_writer(ret);
2421             break;
2422 
2423         case BWRITERPS_VERSION(2, 1):
2424             if(dxversion != 9) {
2425                 WARN("Unsupported dxversion for pixel shader 2.x requested: %u\n", dxversion);
2426                 goto fail;
2427             }
2428             init_ps2x_dx9_writer(ret);
2429             break;
2430 
2431         case BWRITERPS_VERSION(3, 0):
2432             if(dxversion != 9) {
2433                 WARN("Unsupported dxversion for pixel shader 3.0 requested: %u\n", dxversion);
2434                 goto fail;
2435             }
2436             init_ps30_dx9_writer(ret);
2437             break;
2438 
2439         default:
2440             WARN("Unexpected shader version requested: %08x\n", version);
2441             goto fail;
2442     }
2443     ret->version = version;
2444     return ret;
2445 
2446 fail:
2447     d3dcompiler_free(ret);
2448     return NULL;
2449 }
2450 
2451 static HRESULT call_instr_handler(struct bc_writer *writer,
2452                                   const struct instruction *instr,
2453                                   struct bytecode_buffer *buffer) {
2454     DWORD i=0;
2455 
2456     while(writer->funcs->instructions[i].opcode != BWRITERSIO_END) {
2457         if(instr->opcode == writer->funcs->instructions[i].opcode) {
2458             if(!writer->funcs->instructions[i].func) {
2459                 WARN("Opcode %u not supported by this profile\n", instr->opcode);
2460                 return E_INVALIDARG;
2461             }
2462             writer->funcs->instructions[i].func(writer, instr, buffer);
2463             return S_OK;
2464         }
2465         i++;
2466     }
2467 
2468     FIXME("Unhandled instruction %u - %s\n", instr->opcode,
2469           debug_print_opcode(instr->opcode));
2470     return E_INVALIDARG;
2471 }
2472 
2473 HRESULT SlWriteBytecode(const struct bwriter_shader *shader, int dxversion, DWORD **result, DWORD *size)
2474 {
2475     struct bc_writer *writer;
2476     struct bytecode_buffer *buffer = NULL;
2477     HRESULT hr;
2478     unsigned int i;
2479 
2480     if(!shader){
2481         ERR("NULL shader structure, aborting\n");
2482         return E_FAIL;
2483     }
2484     writer = create_writer(shader->version, dxversion);
2485     *result = NULL;
2486 
2487     if(!writer) {
2488         WARN("Could not create a bytecode writer instance. Either unsupported version\n");
2489         WARN("or out of memory\n");
2490         hr = E_FAIL;
2491         goto error;
2492     }
2493 
2494     buffer = allocate_buffer();
2495     if(!buffer) {
2496         WARN("Failed to allocate a buffer for the shader bytecode\n");
2497         hr = E_FAIL;
2498         goto error;
2499     }
2500 
2501     /* Write shader type and version */
2502     put_dword(buffer, shader->version);
2503 
2504     writer->funcs->header(writer, shader, buffer);
2505     if(FAILED(writer->state)) {
2506         hr = writer->state;
2507         goto error;
2508     }
2509 
2510     for(i = 0; i < shader->num_instrs; i++) {
2511         hr = call_instr_handler(writer, shader->instr[i], buffer);
2512         if(FAILED(hr)) {
2513             goto error;
2514         }
2515     }
2516 
2517     if(FAILED(writer->state)) {
2518         hr = writer->state;
2519         goto error;
2520     }
2521 
2522     writer->funcs->end(writer, shader, buffer);
2523 
2524     if(FAILED(buffer->state)) {
2525         hr = buffer->state;
2526         goto error;
2527     }
2528 
2529     *size = buffer->size * sizeof(DWORD);
2530     *result = buffer->data;
2531     buffer->data = NULL;
2532     hr = S_OK;
2533 
2534 error:
2535     if(buffer) {
2536         d3dcompiler_free(buffer->data);
2537         d3dcompiler_free(buffer);
2538     }
2539     d3dcompiler_free(writer);
2540     return hr;
2541 }
2542 
2543 void SlDeleteShader(struct bwriter_shader *shader) {
2544     unsigned int i, j;
2545 
2546     TRACE("Deleting shader %p\n", shader);
2547 
2548     for(i = 0; i < shader->num_cf; i++) {
2549         d3dcompiler_free(shader->constF[i]);
2550     }
2551     d3dcompiler_free(shader->constF);
2552     for(i = 0; i < shader->num_ci; i++) {
2553         d3dcompiler_free(shader->constI[i]);
2554     }
2555     d3dcompiler_free(shader->constI);
2556     for(i = 0; i < shader->num_cb; i++) {
2557         d3dcompiler_free(shader->constB[i]);
2558     }
2559     d3dcompiler_free(shader->constB);
2560 
2561     d3dcompiler_free(shader->inputs);
2562     d3dcompiler_free(shader->outputs);
2563     d3dcompiler_free(shader->samplers);
2564 
2565     for(i = 0; i < shader->num_instrs; i++) {
2566         for(j = 0; j < shader->instr[i]->num_srcs; j++) {
2567             d3dcompiler_free(shader->instr[i]->src[j].rel_reg);
2568         }
2569         d3dcompiler_free(shader->instr[i]->src);
2570         d3dcompiler_free(shader->instr[i]->dst.rel_reg);
2571         d3dcompiler_free(shader->instr[i]);
2572     }
2573     d3dcompiler_free(shader->instr);
2574 
2575     d3dcompiler_free(shader);
2576 }
2577