1 /*
2  * Direct3D bytecode output functions
3  *
4  * Copyright 2008 Stefan Dösinger
5  * Copyright 2009 Matteo Bruni
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20  *
21  */
22 
23 #include "config.h"
24 #include "wine/port.h"
25 #include "wine/debug.h"
26 
27 #include "d3d9types.h"
28 #include "d3dcompiler_private.h"
29 
30 WINE_DEFAULT_DEBUG_CHANNEL(bytecodewriter);
31 
32 /****************************************************************
33  * General assembler shader construction helper routines follow *
34  ****************************************************************/
35 /* struct instruction *alloc_instr
36  *
37  * Allocates a new instruction structure with srcs registers
38  *
39  * Parameters:
40  *  srcs: Number of source registers to allocate
41  *
42  * Returns:
43  *  A pointer to the allocated instruction structure
44  *  NULL in case of an allocation failure
45  */
46 struct instruction *alloc_instr(unsigned int srcs) {
47     struct instruction *ret = d3dcompiler_alloc(sizeof(*ret));
48     if(!ret) {
49         ERR("Failed to allocate memory for an instruction structure\n");
50         return NULL;
51     }
52 
53     if(srcs) {
54         ret->src = d3dcompiler_alloc(srcs * sizeof(*ret->src));
55         if(!ret->src) {
56             ERR("Failed to allocate memory for instruction registers\n");
57             d3dcompiler_free(ret);
58             return NULL;
59         }
60         ret->num_srcs = srcs;
61     }
62     return ret;
63 }
64 
65 /* void add_instruction
66  *
67  * Adds a new instruction to the shader's instructions array and grows the instruction array
68  * if needed.
69  *
70  * The function does NOT copy the instruction structure. Make sure not to release the
71  * instruction or any of its substructures like registers.
72  *
73  * Parameters:
74  *  shader: Shader to add the instruction to
75  *  instr: Instruction to add to the shader
76  */
77 BOOL add_instruction(struct bwriter_shader *shader, struct instruction *instr) {
78     struct instruction      **new_instructions;
79 
80     if(!shader) return FALSE;
81 
82     if(shader->instr_alloc_size == 0) {
83         shader->instr = d3dcompiler_alloc(sizeof(*shader->instr) * INSTRARRAY_INITIAL_SIZE);
84         if(!shader->instr) {
85             ERR("Failed to allocate the shader instruction array\n");
86             return FALSE;
87         }
88         shader->instr_alloc_size = INSTRARRAY_INITIAL_SIZE;
89     } else if(shader->instr_alloc_size == shader->num_instrs) {
90         new_instructions = d3dcompiler_realloc(shader->instr,
91                                        sizeof(*shader->instr) * (shader->instr_alloc_size) * 2);
92         if(!new_instructions) {
93             ERR("Failed to grow the shader instruction array\n");
94             return FALSE;
95         }
96         shader->instr = new_instructions;
97         shader->instr_alloc_size = shader->instr_alloc_size * 2;
98     } else if(shader->num_instrs > shader->instr_alloc_size) {
99         ERR("More instructions than allocated. This should not happen\n");
100         return FALSE;
101     }
102 
103     shader->instr[shader->num_instrs] = instr;
104     shader->num_instrs++;
105     return TRUE;
106 }
107 
108 BOOL add_constF(struct bwriter_shader *shader, DWORD reg, float x, float y, float z, float w) {
109     struct constant *newconst;
110 
111     if(shader->num_cf) {
112         struct constant **newarray;
113         newarray = d3dcompiler_realloc(shader->constF,
114                                sizeof(*shader->constF) * (shader->num_cf + 1));
115         if(!newarray) {
116             ERR("Failed to grow the constants array\n");
117             return FALSE;
118         }
119         shader->constF = newarray;
120     } else {
121         shader->constF = d3dcompiler_alloc(sizeof(*shader->constF));
122         if(!shader->constF) {
123             ERR("Failed to allocate the constants array\n");
124             return FALSE;
125         }
126     }
127 
128     newconst = d3dcompiler_alloc(sizeof(*newconst));
129     if(!newconst) {
130         ERR("Failed to allocate a new constant\n");
131         return FALSE;
132     }
133     newconst->regnum = reg;
134     newconst->value[0].f = x;
135     newconst->value[1].f = y;
136     newconst->value[2].f = z;
137     newconst->value[3].f = w;
138     shader->constF[shader->num_cf] = newconst;
139 
140     shader->num_cf++;
141     return TRUE;
142 }
143 
144 BOOL add_constI(struct bwriter_shader *shader, DWORD reg, INT x, INT y, INT z, INT w) {
145     struct constant *newconst;
146 
147     if(shader->num_ci) {
148         struct constant **newarray;
149         newarray = d3dcompiler_realloc(shader->constI,
150                                sizeof(*shader->constI) * (shader->num_ci + 1));
151         if(!newarray) {
152             ERR("Failed to grow the constants array\n");
153             return FALSE;
154         }
155         shader->constI = newarray;
156     } else {
157         shader->constI = d3dcompiler_alloc(sizeof(*shader->constI));
158         if(!shader->constI) {
159             ERR("Failed to allocate the constants array\n");
160             return FALSE;
161         }
162     }
163 
164     newconst = d3dcompiler_alloc(sizeof(*newconst));
165     if(!newconst) {
166         ERR("Failed to allocate a new constant\n");
167         return FALSE;
168     }
169     newconst->regnum = reg;
170     newconst->value[0].i = x;
171     newconst->value[1].i = y;
172     newconst->value[2].i = z;
173     newconst->value[3].i = w;
174     shader->constI[shader->num_ci] = newconst;
175 
176     shader->num_ci++;
177     return TRUE;
178 }
179 
180 BOOL add_constB(struct bwriter_shader *shader, DWORD reg, BOOL x) {
181     struct constant *newconst;
182 
183     if(shader->num_cb) {
184         struct constant **newarray;
185         newarray = d3dcompiler_realloc(shader->constB,
186                                sizeof(*shader->constB) * (shader->num_cb + 1));
187         if(!newarray) {
188             ERR("Failed to grow the constants array\n");
189             return FALSE;
190         }
191         shader->constB = newarray;
192     } else {
193         shader->constB = d3dcompiler_alloc(sizeof(*shader->constB));
194         if(!shader->constB) {
195             ERR("Failed to allocate the constants array\n");
196             return FALSE;
197         }
198     }
199 
200     newconst = d3dcompiler_alloc(sizeof(*newconst));
201     if(!newconst) {
202         ERR("Failed to allocate a new constant\n");
203         return FALSE;
204     }
205     newconst->regnum = reg;
206     newconst->value[0].b = x;
207     shader->constB[shader->num_cb] = newconst;
208 
209     shader->num_cb++;
210     return TRUE;
211 }
212 
213 BOOL record_declaration(struct bwriter_shader *shader, DWORD usage,
214                         DWORD usage_idx, DWORD mod, BOOL output,
215                         DWORD regnum, DWORD writemask, BOOL builtin) {
216     unsigned int *num;
217     struct declaration **decl;
218     unsigned int i;
219 
220     if(!shader) return FALSE;
221 
222     if(output) {
223         num = &shader->num_outputs;
224         decl = &shader->outputs;
225     } else {
226         num = &shader->num_inputs;
227         decl = &shader->inputs;
228     }
229 
230     if(*num == 0) {
231         *decl = d3dcompiler_alloc(sizeof(**decl));
232         if(!*decl) {
233             ERR("Error allocating declarations array\n");
234             return FALSE;
235         }
236     } else {
237         struct declaration *newdecl;
238         for(i = 0; i < *num; i++) {
239             if((*decl)[i].regnum == regnum && ((*decl)[i].writemask & writemask)) {
240                 WARN("Declaration of register %u already exists, writemask match 0x%x\n",
241                       regnum, (*decl)[i].writemask & writemask);
242             }
243         }
244 
245         newdecl = d3dcompiler_realloc(*decl,
246                               sizeof(**decl) * ((*num) + 1));
247         if(!newdecl) {
248             ERR("Error reallocating declarations array\n");
249             return FALSE;
250         }
251         *decl = newdecl;
252     }
253     (*decl)[*num].usage = usage;
254     (*decl)[*num].usage_idx = usage_idx;
255     (*decl)[*num].regnum = regnum;
256     (*decl)[*num].mod = mod;
257     (*decl)[*num].writemask = writemask;
258     (*decl)[*num].builtin = builtin;
259     (*num)++;
260 
261     return TRUE;
262 }
263 
264 BOOL record_sampler(struct bwriter_shader *shader, DWORD samptype, DWORD mod, DWORD regnum) {
265     unsigned int i;
266 
267     if(!shader) return FALSE;
268 
269     if(shader->num_samplers == 0) {
270         shader->samplers = d3dcompiler_alloc(sizeof(*shader->samplers));
271         if(!shader->samplers) {
272             ERR("Error allocating samplers array\n");
273             return FALSE;
274         }
275     } else {
276         struct samplerdecl *newarray;
277 
278         for(i = 0; i < shader->num_samplers; i++) {
279             if(shader->samplers[i].regnum == regnum) {
280                 WARN("Sampler %u already declared\n", regnum);
281                 /* This is not an error as far as the assembler is concerned.
282                  * Direct3D might refuse to load the compiled shader though
283                  */
284             }
285         }
286 
287         newarray = d3dcompiler_realloc(shader->samplers,
288                                sizeof(*shader->samplers) * (shader->num_samplers + 1));
289         if(!newarray) {
290             ERR("Error reallocating samplers array\n");
291             return FALSE;
292         }
293         shader->samplers = newarray;
294     }
295 
296     shader->samplers[shader->num_samplers].type = samptype;
297     shader->samplers[shader->num_samplers].mod = mod;
298     shader->samplers[shader->num_samplers].regnum = regnum;
299     shader->num_samplers++;
300     return TRUE;
301 }
302 
303 
304 /* shader bytecode buffer manipulation functions.
305  * allocate_buffer creates a new buffer structure, put_dword adds a new
306  * DWORD to the buffer. In the rare case of a memory allocation failure
307  * when trying to grow the buffer a flag is set in the buffer to mark it
308  * invalid. This avoids return value checking and passing in many places
309  */
310 static struct bytecode_buffer *allocate_buffer(void) {
311     struct bytecode_buffer *ret;
312 
313     ret = d3dcompiler_alloc(sizeof(*ret));
314     if(!ret) return NULL;
315 
316     ret->alloc_size = BYTECODEBUFFER_INITIAL_SIZE;
317     ret->data = d3dcompiler_alloc(sizeof(DWORD) * ret->alloc_size);
318     if(!ret->data) {
319         d3dcompiler_free(ret);
320         return NULL;
321     }
322     ret->state = S_OK;
323     return ret;
324 }
325 
326 static void put_dword(struct bytecode_buffer *buffer, DWORD value) {
327     if(FAILED(buffer->state)) return;
328 
329     if(buffer->alloc_size == buffer->size) {
330         DWORD *newarray;
331         buffer->alloc_size *= 2;
332         newarray = d3dcompiler_realloc(buffer->data,
333                                sizeof(DWORD) * buffer->alloc_size);
334         if(!newarray) {
335             ERR("Failed to grow the buffer data memory\n");
336             buffer->state = E_OUTOFMEMORY;
337             return;
338         }
339         buffer->data = newarray;
340     }
341     buffer->data[buffer->size++] = value;
342 }
343 
344 /* bwriter -> d3d9 conversion functions. */
345 static DWORD d3d9_swizzle(DWORD bwriter_swizzle)
346 {
347     /* Currently a NOP, but this allows changing the internal definitions
348      * without side effects. */
349     DWORD ret = 0;
350 
351     if ((bwriter_swizzle & BWRITERVS_X_X) == BWRITERVS_X_X) ret |= D3DVS_X_X;
352     if ((bwriter_swizzle & BWRITERVS_X_Y) == BWRITERVS_X_Y) ret |= D3DVS_X_Y;
353     if ((bwriter_swizzle & BWRITERVS_X_Z) == BWRITERVS_X_Z) ret |= D3DVS_X_Z;
354     if ((bwriter_swizzle & BWRITERVS_X_W) == BWRITERVS_X_W) ret |= D3DVS_X_W;
355 
356     if ((bwriter_swizzle & BWRITERVS_Y_X) == BWRITERVS_Y_X) ret |= D3DVS_Y_X;
357     if ((bwriter_swizzle & BWRITERVS_Y_Y) == BWRITERVS_Y_Y) ret |= D3DVS_Y_Y;
358     if ((bwriter_swizzle & BWRITERVS_Y_Z) == BWRITERVS_Y_Z) ret |= D3DVS_Y_Z;
359     if ((bwriter_swizzle & BWRITERVS_Y_W) == BWRITERVS_Y_W) ret |= D3DVS_Y_W;
360 
361     if ((bwriter_swizzle & BWRITERVS_Z_X) == BWRITERVS_Z_X) ret |= D3DVS_Z_X;
362     if ((bwriter_swizzle & BWRITERVS_Z_Y) == BWRITERVS_Z_Y) ret |= D3DVS_Z_Y;
363     if ((bwriter_swizzle & BWRITERVS_Z_Z) == BWRITERVS_Z_Z) ret |= D3DVS_Z_Z;
364     if ((bwriter_swizzle & BWRITERVS_Z_W) == BWRITERVS_Z_W) ret |= D3DVS_Z_W;
365 
366     if ((bwriter_swizzle & BWRITERVS_W_X) == BWRITERVS_W_X) ret |= D3DVS_W_X;
367     if ((bwriter_swizzle & BWRITERVS_W_Y) == BWRITERVS_W_Y) ret |= D3DVS_W_Y;
368     if ((bwriter_swizzle & BWRITERVS_W_Z) == BWRITERVS_W_Z) ret |= D3DVS_W_Z;
369     if ((bwriter_swizzle & BWRITERVS_W_W) == BWRITERVS_W_W) ret |= D3DVS_W_W;
370 
371     return ret;
372 }
373 
374 static DWORD d3d9_writemask(DWORD bwriter_writemask)
375 {
376     DWORD ret = 0;
377 
378     if (bwriter_writemask & BWRITERSP_WRITEMASK_0) ret |= D3DSP_WRITEMASK_0;
379     if (bwriter_writemask & BWRITERSP_WRITEMASK_1) ret |= D3DSP_WRITEMASK_1;
380     if (bwriter_writemask & BWRITERSP_WRITEMASK_2) ret |= D3DSP_WRITEMASK_2;
381     if (bwriter_writemask & BWRITERSP_WRITEMASK_3) ret |= D3DSP_WRITEMASK_3;
382 
383     return ret;
384 }
385 
386 static DWORD d3d9_srcmod(DWORD bwriter_srcmod)
387 {
388     switch (bwriter_srcmod)
389     {
390         case BWRITERSPSM_NONE:       return D3DSPSM_NONE;
391         case BWRITERSPSM_NEG:        return D3DSPSM_NEG;
392         case BWRITERSPSM_BIAS:       return D3DSPSM_BIAS;
393         case BWRITERSPSM_BIASNEG:    return D3DSPSM_BIASNEG;
394         case BWRITERSPSM_SIGN:       return D3DSPSM_SIGN;
395         case BWRITERSPSM_SIGNNEG:    return D3DSPSM_SIGNNEG;
396         case BWRITERSPSM_COMP:       return D3DSPSM_COMP;
397         case BWRITERSPSM_X2:         return D3DSPSM_X2;
398         case BWRITERSPSM_X2NEG:      return D3DSPSM_X2NEG;
399         case BWRITERSPSM_DZ:         return D3DSPSM_DZ;
400         case BWRITERSPSM_DW:         return D3DSPSM_DW;
401         case BWRITERSPSM_ABS:        return D3DSPSM_ABS;
402         case BWRITERSPSM_ABSNEG:     return D3DSPSM_ABSNEG;
403         case BWRITERSPSM_NOT:        return D3DSPSM_NOT;
404         default:
405             FIXME("Unhandled BWRITERSPSM token %#x.\n", bwriter_srcmod);
406             return 0;
407     }
408 }
409 
410 static DWORD d3d9_dstmod(DWORD bwriter_mod)
411 {
412     DWORD ret = 0;
413 
414     if (bwriter_mod & BWRITERSPDM_SATURATE)         ret |= D3DSPDM_SATURATE;
415     if (bwriter_mod & BWRITERSPDM_PARTIALPRECISION) ret |= D3DSPDM_PARTIALPRECISION;
416     if (bwriter_mod & BWRITERSPDM_MSAMPCENTROID)    ret |= D3DSPDM_MSAMPCENTROID;
417 
418     return ret;
419 }
420 
421 static DWORD d3d9_comparetype(DWORD asmshader_comparetype)
422 {
423     switch (asmshader_comparetype)
424     {
425         case BWRITER_COMPARISON_GT:     return D3DSPC_GT;
426         case BWRITER_COMPARISON_EQ:     return D3DSPC_EQ;
427         case BWRITER_COMPARISON_GE:     return D3DSPC_GE;
428         case BWRITER_COMPARISON_LT:     return D3DSPC_LT;
429         case BWRITER_COMPARISON_NE:     return D3DSPC_NE;
430         case BWRITER_COMPARISON_LE:     return D3DSPC_LE;
431         default:
432             FIXME("Unexpected BWRITER_COMPARISON type %#x.\n", asmshader_comparetype);
433             return 0;
434     }
435 }
436 
437 static DWORD d3d9_sampler(DWORD bwriter_sampler)
438 {
439     if (bwriter_sampler == BWRITERSTT_UNKNOWN)  return D3DSTT_UNKNOWN;
440     if (bwriter_sampler == BWRITERSTT_1D)       return D3DSTT_1D;
441     if (bwriter_sampler == BWRITERSTT_2D)       return D3DSTT_2D;
442     if (bwriter_sampler == BWRITERSTT_CUBE)     return D3DSTT_CUBE;
443     if (bwriter_sampler == BWRITERSTT_VOLUME)   return D3DSTT_VOLUME;
444     FIXME("Unexpected BWRITERSAMPLER_TEXTURE_TYPE type %#x.\n", bwriter_sampler);
445 
446     return 0;
447 }
448 
449 static DWORD d3d9_register(DWORD bwriter_register)
450 {
451     if (bwriter_register == BWRITERSPR_TEMP)        return D3DSPR_TEMP;
452     if (bwriter_register == BWRITERSPR_INPUT)       return D3DSPR_INPUT;
453     if (bwriter_register == BWRITERSPR_CONST)       return D3DSPR_CONST;
454     if (bwriter_register == BWRITERSPR_ADDR)        return D3DSPR_ADDR;
455     if (bwriter_register == BWRITERSPR_TEXTURE)     return D3DSPR_TEXTURE;
456     if (bwriter_register == BWRITERSPR_RASTOUT)     return D3DSPR_RASTOUT;
457     if (bwriter_register == BWRITERSPR_ATTROUT)     return D3DSPR_ATTROUT;
458     if (bwriter_register == BWRITERSPR_TEXCRDOUT)   return D3DSPR_TEXCRDOUT;
459     if (bwriter_register == BWRITERSPR_OUTPUT)      return D3DSPR_OUTPUT;
460     if (bwriter_register == BWRITERSPR_CONSTINT)    return D3DSPR_CONSTINT;
461     if (bwriter_register == BWRITERSPR_COLOROUT)    return D3DSPR_COLOROUT;
462     if (bwriter_register == BWRITERSPR_DEPTHOUT)    return D3DSPR_DEPTHOUT;
463     if (bwriter_register == BWRITERSPR_SAMPLER)     return D3DSPR_SAMPLER;
464     if (bwriter_register == BWRITERSPR_CONSTBOOL)   return D3DSPR_CONSTBOOL;
465     if (bwriter_register == BWRITERSPR_LOOP)        return D3DSPR_LOOP;
466     if (bwriter_register == BWRITERSPR_MISCTYPE)    return D3DSPR_MISCTYPE;
467     if (bwriter_register == BWRITERSPR_LABEL)       return D3DSPR_LABEL;
468     if (bwriter_register == BWRITERSPR_PREDICATE)   return D3DSPR_PREDICATE;
469 
470     FIXME("Unexpected BWRITERSPR %#x.\n", bwriter_register);
471     return ~0U;
472 }
473 
474 static DWORD d3d9_opcode(DWORD bwriter_opcode)
475 {
476     switch (bwriter_opcode)
477     {
478         case BWRITERSIO_NOP:         return D3DSIO_NOP;
479         case BWRITERSIO_MOV:         return D3DSIO_MOV;
480         case BWRITERSIO_ADD:         return D3DSIO_ADD;
481         case BWRITERSIO_SUB:         return D3DSIO_SUB;
482         case BWRITERSIO_MAD:         return D3DSIO_MAD;
483         case BWRITERSIO_MUL:         return D3DSIO_MUL;
484         case BWRITERSIO_RCP:         return D3DSIO_RCP;
485         case BWRITERSIO_RSQ:         return D3DSIO_RSQ;
486         case BWRITERSIO_DP3:         return D3DSIO_DP3;
487         case BWRITERSIO_DP4:         return D3DSIO_DP4;
488         case BWRITERSIO_MIN:         return D3DSIO_MIN;
489         case BWRITERSIO_MAX:         return D3DSIO_MAX;
490         case BWRITERSIO_SLT:         return D3DSIO_SLT;
491         case BWRITERSIO_SGE:         return D3DSIO_SGE;
492         case BWRITERSIO_EXP:         return D3DSIO_EXP;
493         case BWRITERSIO_LOG:         return D3DSIO_LOG;
494         case BWRITERSIO_LIT:         return D3DSIO_LIT;
495         case BWRITERSIO_DST:         return D3DSIO_DST;
496         case BWRITERSIO_LRP:         return D3DSIO_LRP;
497         case BWRITERSIO_FRC:         return D3DSIO_FRC;
498         case BWRITERSIO_M4x4:        return D3DSIO_M4x4;
499         case BWRITERSIO_M4x3:        return D3DSIO_M4x3;
500         case BWRITERSIO_M3x4:        return D3DSIO_M3x4;
501         case BWRITERSIO_M3x3:        return D3DSIO_M3x3;
502         case BWRITERSIO_M3x2:        return D3DSIO_M3x2;
503         case BWRITERSIO_CALL:        return D3DSIO_CALL;
504         case BWRITERSIO_CALLNZ:      return D3DSIO_CALLNZ;
505         case BWRITERSIO_LOOP:        return D3DSIO_LOOP;
506         case BWRITERSIO_RET:         return D3DSIO_RET;
507         case BWRITERSIO_ENDLOOP:     return D3DSIO_ENDLOOP;
508         case BWRITERSIO_LABEL:       return D3DSIO_LABEL;
509         case BWRITERSIO_DCL:         return D3DSIO_DCL;
510         case BWRITERSIO_POW:         return D3DSIO_POW;
511         case BWRITERSIO_CRS:         return D3DSIO_CRS;
512         case BWRITERSIO_SGN:         return D3DSIO_SGN;
513         case BWRITERSIO_ABS:         return D3DSIO_ABS;
514         case BWRITERSIO_NRM:         return D3DSIO_NRM;
515         case BWRITERSIO_SINCOS:      return D3DSIO_SINCOS;
516         case BWRITERSIO_REP:         return D3DSIO_REP;
517         case BWRITERSIO_ENDREP:      return D3DSIO_ENDREP;
518         case BWRITERSIO_IF:          return D3DSIO_IF;
519         case BWRITERSIO_IFC:         return D3DSIO_IFC;
520         case BWRITERSIO_ELSE:        return D3DSIO_ELSE;
521         case BWRITERSIO_ENDIF:       return D3DSIO_ENDIF;
522         case BWRITERSIO_BREAK:       return D3DSIO_BREAK;
523         case BWRITERSIO_BREAKC:      return D3DSIO_BREAKC;
524         case BWRITERSIO_MOVA:        return D3DSIO_MOVA;
525         case BWRITERSIO_DEFB:        return D3DSIO_DEFB;
526         case BWRITERSIO_DEFI:        return D3DSIO_DEFI;
527 
528         case BWRITERSIO_TEXCOORD:    return D3DSIO_TEXCOORD;
529         case BWRITERSIO_TEXKILL:     return D3DSIO_TEXKILL;
530         case BWRITERSIO_TEX:         return D3DSIO_TEX;
531         case BWRITERSIO_TEXBEM:      return D3DSIO_TEXBEM;
532         case BWRITERSIO_TEXBEML:     return D3DSIO_TEXBEML;
533         case BWRITERSIO_TEXREG2AR:   return D3DSIO_TEXREG2AR;
534         case BWRITERSIO_TEXREG2GB:   return D3DSIO_TEXREG2GB;
535         case BWRITERSIO_TEXM3x2PAD:  return D3DSIO_TEXM3x2PAD;
536         case BWRITERSIO_TEXM3x2TEX:  return D3DSIO_TEXM3x2TEX;
537         case BWRITERSIO_TEXM3x3PAD:  return D3DSIO_TEXM3x3PAD;
538         case BWRITERSIO_TEXM3x3TEX:  return D3DSIO_TEXM3x3TEX;
539         case BWRITERSIO_TEXM3x3SPEC: return D3DSIO_TEXM3x3SPEC;
540         case BWRITERSIO_TEXM3x3VSPEC:return D3DSIO_TEXM3x3VSPEC;
541         case BWRITERSIO_EXPP:        return D3DSIO_EXPP;
542         case BWRITERSIO_LOGP:        return D3DSIO_LOGP;
543         case BWRITERSIO_CND:         return D3DSIO_CND;
544         case BWRITERSIO_DEF:         return D3DSIO_DEF;
545         case BWRITERSIO_TEXREG2RGB:  return D3DSIO_TEXREG2RGB;
546         case BWRITERSIO_TEXDP3TEX:   return D3DSIO_TEXDP3TEX;
547         case BWRITERSIO_TEXM3x2DEPTH:return D3DSIO_TEXM3x2DEPTH;
548         case BWRITERSIO_TEXDP3:      return D3DSIO_TEXDP3;
549         case BWRITERSIO_TEXM3x3:     return D3DSIO_TEXM3x3;
550         case BWRITERSIO_TEXDEPTH:    return D3DSIO_TEXDEPTH;
551         case BWRITERSIO_CMP:         return D3DSIO_CMP;
552         case BWRITERSIO_BEM:         return D3DSIO_BEM;
553         case BWRITERSIO_DP2ADD:      return D3DSIO_DP2ADD;
554         case BWRITERSIO_DSX:         return D3DSIO_DSX;
555         case BWRITERSIO_DSY:         return D3DSIO_DSY;
556         case BWRITERSIO_TEXLDD:      return D3DSIO_TEXLDD;
557         case BWRITERSIO_SETP:        return D3DSIO_SETP;
558         case BWRITERSIO_TEXLDL:      return D3DSIO_TEXLDL;
559         case BWRITERSIO_BREAKP:      return D3DSIO_BREAKP;
560 
561         case BWRITERSIO_PHASE:       return D3DSIO_PHASE;
562         case BWRITERSIO_COMMENT:     return D3DSIO_COMMENT;
563         case BWRITERSIO_END:         return D3DSIO_END;
564 
565         case BWRITERSIO_TEXLDP:      return D3DSIO_TEX | D3DSI_TEXLD_PROJECT;
566         case BWRITERSIO_TEXLDB:      return D3DSIO_TEX | D3DSI_TEXLD_BIAS;
567 
568         default:
569             FIXME("Unhandled BWRITERSIO token %#x.\n", bwriter_opcode);
570             return ~0U;
571     }
572 }
573 
574 static DWORD d3dsp_register( D3DSHADER_PARAM_REGISTER_TYPE type, DWORD num )
575 {
576     return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
577            ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
578            (num & D3DSP_REGNUM_MASK); /* No shift */
579 }
580 
581 /******************************************************
582  * Implementation of the writer functions starts here *
583  ******************************************************/
584 static void write_declarations(struct bc_writer *This,
585                                struct bytecode_buffer *buffer, BOOL len,
586                                const struct declaration *decls, unsigned int num, DWORD type) {
587     DWORD i;
588     DWORD instr_dcl = D3DSIO_DCL;
589     DWORD token;
590     struct shader_reg reg;
591 
592     ZeroMemory(&reg, sizeof(reg));
593 
594     if(len) {
595         instr_dcl |= 2 << D3DSI_INSTLENGTH_SHIFT;
596     }
597 
598     for(i = 0; i < num; i++) {
599         if(decls[i].builtin) continue;
600 
601         /* Write the DCL instruction */
602         put_dword(buffer, instr_dcl);
603 
604         /* Write the usage and index */
605         token = (1u << 31); /* Bit 31 of non-instruction opcodes is 1 */
606         token |= (decls[i].usage << D3DSP_DCL_USAGE_SHIFT) & D3DSP_DCL_USAGE_MASK;
607         token |= (decls[i].usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT) & D3DSP_DCL_USAGEINDEX_MASK;
608         put_dword(buffer, token);
609 
610         /* Write the dest register */
611         reg.type = type;
612         reg.regnum = decls[i].regnum;
613         reg.u.writemask = decls[i].writemask;
614         This->funcs->dstreg(This, &reg, buffer, 0, decls[i].mod);
615     }
616 }
617 
618 static void write_const(struct constant **consts, int num, DWORD opcode, DWORD reg_type, struct bytecode_buffer *buffer, BOOL len) {
619     int i;
620     DWORD instr_def = opcode;
621     const DWORD reg = (1u << 31) | d3dsp_register( reg_type, 0 ) | D3DSP_WRITEMASK_ALL;
622 
623     if(len) {
624         if(opcode == D3DSIO_DEFB)
625             instr_def |= 2 << D3DSI_INSTLENGTH_SHIFT;
626         else
627             instr_def |= 5 << D3DSI_INSTLENGTH_SHIFT;
628     }
629 
630     for(i = 0; i < num; i++) {
631         /* Write the DEF instruction */
632         put_dword(buffer, instr_def);
633 
634         put_dword(buffer, reg | (consts[i]->regnum & D3DSP_REGNUM_MASK));
635         put_dword(buffer, consts[i]->value[0].d);
636         if(opcode != D3DSIO_DEFB) {
637             put_dword(buffer, consts[i]->value[1].d);
638             put_dword(buffer, consts[i]->value[2].d);
639             put_dword(buffer, consts[i]->value[3].d);
640         }
641     }
642 }
643 
644 static void write_constF(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
645     write_const(shader->constF, shader->num_cf, D3DSIO_DEF, D3DSPR_CONST, buffer, len);
646 }
647 
648 /* This function looks for VS 1/2 registers mapping to VS 3 output registers */
649 static HRESULT vs_find_builtin_varyings(struct bc_writer *This, const struct bwriter_shader *shader) {
650     DWORD i;
651     DWORD usage, usage_idx, writemask, regnum;
652 
653     for(i = 0; i < shader->num_outputs; i++) {
654         if(!shader->outputs[i].builtin) continue;
655 
656         usage = shader->outputs[i].usage;
657         usage_idx = shader->outputs[i].usage_idx;
658         writemask = shader->outputs[i].writemask;
659         regnum = shader->outputs[i].regnum;
660 
661         switch(usage) {
662             case BWRITERDECLUSAGE_POSITION:
663             case BWRITERDECLUSAGE_POSITIONT:
664                 if(usage_idx > 0) {
665                     WARN("dcl_position%u not supported in sm 1/2 shaders\n", usage_idx);
666                     return E_INVALIDARG;
667                 }
668                 TRACE("o%u is oPos\n", regnum);
669                 This->oPos_regnum = regnum;
670                 break;
671 
672             case BWRITERDECLUSAGE_COLOR:
673                 if(usage_idx > 1) {
674                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
675                     return E_INVALIDARG;
676                 }
677                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
678                     WARN("Only WRITEMASK_ALL is supported on color in sm 1/2\n");
679                     return E_INVALIDARG;
680                 }
681                 TRACE("o%u is oD%u\n", regnum, usage_idx);
682                 This->oD_regnum[usage_idx] = regnum;
683                 break;
684 
685             case BWRITERDECLUSAGE_TEXCOORD:
686                 if(usage_idx >= 8) {
687                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
688                     return E_INVALIDARG;
689                 }
690                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
691                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
692                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
693                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
694                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
695                     return E_INVALIDARG;
696                 }
697                 TRACE("o%u is oT%u\n", regnum, usage_idx);
698                 This->oT_regnum[usage_idx] = regnum;
699                 break;
700 
701             case BWRITERDECLUSAGE_PSIZE:
702                 if(usage_idx > 0) {
703                     WARN("dcl_psize%u not supported in sm 1/2 shaders\n", usage_idx);
704                     return E_INVALIDARG;
705                 }
706                 TRACE("o%u writemask 0x%08x is oPts\n", regnum, writemask);
707                 This->oPts_regnum = regnum;
708                 This->oPts_mask = writemask;
709                 break;
710 
711             case BWRITERDECLUSAGE_FOG:
712                 if(usage_idx > 0) {
713                     WARN("dcl_fog%u not supported in sm 1 shaders\n", usage_idx);
714                     return E_INVALIDARG;
715                 }
716                 if(writemask != BWRITERSP_WRITEMASK_0 && writemask != BWRITERSP_WRITEMASK_1 &&
717                    writemask != BWRITERSP_WRITEMASK_2 && writemask != BWRITERSP_WRITEMASK_3) {
718                     WARN("Unsupported fog writemask\n");
719                     return E_INVALIDARG;
720                 }
721                 TRACE("o%u writemask 0x%08x is oFog\n", regnum, writemask);
722                 This->oFog_regnum = regnum;
723                 This->oFog_mask = writemask;
724                 break;
725 
726             default:
727                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
728                 return E_INVALIDARG;
729         }
730     }
731 
732     return S_OK;
733 }
734 
735 static void vs_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
736     HRESULT hr;
737 
738     if(shader->num_ci || shader->num_cb) {
739         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
740         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
741         This->state = E_INVALIDARG;
742         return;
743     }
744 
745     hr = vs_find_builtin_varyings(This, shader);
746     if(FAILED(hr)) {
747         This->state = hr;
748         return;
749     }
750 
751     write_declarations(This, buffer, FALSE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
752     write_constF(shader, buffer, FALSE);
753 }
754 
755 static HRESULT find_ps_builtin_semantics(struct bc_writer *This,
756                                          const struct bwriter_shader *shader,
757                                          DWORD texcoords) {
758     DWORD i;
759     DWORD usage, usage_idx, writemask, regnum;
760 
761     This->v_regnum[0] = -1; This->v_regnum[1] = -1;
762     for(i = 0; i < 8; i++) This->t_regnum[i] = -1;
763 
764     for(i = 0; i < shader->num_inputs; i++) {
765         if(!shader->inputs[i].builtin) continue;
766 
767         usage = shader->inputs[i].usage;
768         usage_idx = shader->inputs[i].usage_idx;
769         writemask = shader->inputs[i].writemask;
770         regnum = shader->inputs[i].regnum;
771 
772         switch(usage) {
773             case BWRITERDECLUSAGE_COLOR:
774                 if(usage_idx > 1) {
775                     WARN("dcl_color%u not supported in sm 1 shaders\n", usage_idx);
776                     return E_INVALIDARG;
777                 }
778                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
779                     WARN("Only WRITEMASK_ALL is supported on color in sm 1\n");
780                     return E_INVALIDARG;
781                 }
782                 TRACE("v%u is v%u\n", regnum, usage_idx);
783                 This->v_regnum[usage_idx] = regnum;
784                 break;
785 
786             case BWRITERDECLUSAGE_TEXCOORD:
787                 if(usage_idx > texcoords) {
788                     WARN("dcl_texcoord%u not supported in this shader version\n", usage_idx);
789                     return E_INVALIDARG;
790                 }
791                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
792                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
793                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
794                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
795                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
796                 } else {
797                     writemask = BWRITERSP_WRITEMASK_ALL;
798                 }
799                 TRACE("v%u is t%u\n", regnum, usage_idx);
800                 This->t_regnum[usage_idx] = regnum;
801                 break;
802 
803             default:
804                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
805                 return E_INVALIDARG;
806         }
807     }
808 
809     return S_OK;
810 }
811 
812 static void ps_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
813     HRESULT hr;
814 
815     /* First check the constants and varyings, and complain if unsupported things are used */
816     if(shader->num_ci || shader->num_cb) {
817         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
818         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
819         This->state = E_INVALIDARG;
820         return;
821     }
822 
823     hr = find_ps_builtin_semantics(This, shader, 4);
824     if(FAILED(hr)) {
825         This->state = hr;
826         return;
827     }
828 
829     write_constF(shader, buffer, FALSE);
830 }
831 
832 static void ps_1_4_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
833     HRESULT hr;
834 
835     /* First check the constants and varyings, and complain if unsupported things are used */
836     if(shader->num_ci || shader->num_cb) {
837         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
838         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
839         This->state = E_INVALIDARG;
840         return;
841     }
842     hr = find_ps_builtin_semantics(This, shader, 6);
843     if(FAILED(hr)) {
844         This->state = hr;
845         return;
846     }
847 
848     write_constF(shader, buffer, FALSE);
849 }
850 
851 static void end(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
852     put_dword(buffer, D3DSIO_END);
853 }
854 
855 static DWORD map_vs_output(struct bc_writer *This, DWORD regnum, DWORD mask, DWORD *has_components) {
856     DWORD i;
857 
858     *has_components = TRUE;
859     if(regnum == This->oPos_regnum) {
860         return d3dsp_register( D3DSPR_RASTOUT, D3DSRO_POSITION );
861     }
862     if(regnum == This->oFog_regnum && mask == This->oFog_mask) {
863         *has_components = FALSE;
864         return d3dsp_register( D3DSPR_RASTOUT, D3DSRO_FOG ) | D3DSP_WRITEMASK_ALL;
865     }
866     if(regnum == This->oPts_regnum && mask == This->oPts_mask) {
867         *has_components = FALSE;
868         return d3dsp_register( D3DSPR_RASTOUT, D3DSRO_POINT_SIZE ) | D3DSP_WRITEMASK_ALL;
869     }
870     for(i = 0; i < 2; i++) {
871         if(regnum == This->oD_regnum[i]) {
872             return d3dsp_register( D3DSPR_ATTROUT, i );
873         }
874     }
875     for(i = 0; i < 8; i++) {
876         if(regnum == This->oT_regnum[i]) {
877             return d3dsp_register( D3DSPR_TEXCRDOUT, i );
878         }
879     }
880 
881     /* The varying must be undeclared - if an unsupported varying was declared,
882      * the vs_find_builtin_varyings function would have caught it and this code
883      * would not run */
884     WARN("Undeclared varying %u\n", regnum);
885     This->state = E_INVALIDARG;
886     return -1;
887 }
888 
889 static void vs_12_dstreg(struct bc_writer *This, const struct shader_reg *reg,
890                          struct bytecode_buffer *buffer,
891                          DWORD shift, DWORD mod) {
892     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
893     DWORD has_wmask;
894 
895     if(reg->rel_reg) {
896         WARN("Relative addressing not supported for destination registers\n");
897         This->state = E_INVALIDARG;
898         return;
899     }
900 
901     switch(reg->type) {
902         case BWRITERSPR_OUTPUT:
903             token |= map_vs_output(This, reg->regnum, reg->u.writemask, &has_wmask);
904             break;
905 
906         case BWRITERSPR_RASTOUT:
907         case BWRITERSPR_ATTROUT:
908             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
909             * but are unexpected. If we hit this path it might be due to an error.
910             */
911             FIXME("Unexpected register type %u\n", reg->type);
912             /* drop through */
913         case BWRITERSPR_INPUT:
914         case BWRITERSPR_TEMP:
915         case BWRITERSPR_CONST:
916             token |= d3dsp_register( reg->type, reg->regnum );
917             has_wmask = TRUE;
918             break;
919 
920         case BWRITERSPR_ADDR:
921             if(reg->regnum != 0) {
922                 WARN("Only a0 exists\n");
923                 This->state = E_INVALIDARG;
924                 return;
925             }
926             token |= d3dsp_register( D3DSPR_ADDR, 0 );
927             has_wmask = TRUE;
928             break;
929 
930         case BWRITERSPR_PREDICATE:
931             if(This->version != BWRITERVS_VERSION(2, 1)){
932                 WARN("Predicate register is allowed only in vs_2_x\n");
933                 This->state = E_INVALIDARG;
934                 return;
935             }
936             if(reg->regnum != 0) {
937                 WARN("Only predicate register p0 exists\n");
938                 This->state = E_INVALIDARG;
939                 return;
940             }
941             token |= d3dsp_register( D3DSPR_PREDICATE, 0 );
942             has_wmask = TRUE;
943             break;
944 
945         default:
946             WARN("Invalid register type for 1.x-2.x vertex shader\n");
947             This->state = E_INVALIDARG;
948             return;
949     }
950 
951     /* strictly speaking there are no modifiers in vs_2_0 and vs_1_x, but they can be written
952      * into the bytecode and since the compiler doesn't do such checks write them
953      * (the checks are done by the undocumented shader validator)
954      */
955     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
956     token |= d3d9_dstmod(mod);
957 
958     if(has_wmask) {
959         token |= d3d9_writemask(reg->u.writemask);
960     }
961     put_dword(buffer, token);
962 }
963 
964 static void vs_1_x_srcreg(struct bc_writer *This, const struct shader_reg *reg,
965                           struct bytecode_buffer *buffer) {
966     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
967     DWORD has_swizzle;
968     DWORD component;
969 
970     switch(reg->type) {
971         case BWRITERSPR_OUTPUT:
972             /* Map the swizzle to a writemask, the format expected
973                by map_vs_output
974              */
975             switch(reg->u.swizzle) {
976                 case BWRITERVS_SWIZZLE_X:
977                     component = BWRITERSP_WRITEMASK_0;
978                     break;
979                 case BWRITERVS_SWIZZLE_Y:
980                     component = BWRITERSP_WRITEMASK_1;
981                     break;
982                 case BWRITERVS_SWIZZLE_Z:
983                     component = BWRITERSP_WRITEMASK_2;
984                     break;
985                 case BWRITERVS_SWIZZLE_W:
986                     component = BWRITERSP_WRITEMASK_3;
987                     break;
988                 default:
989                     component = 0;
990             }
991             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
992             break;
993 
994         case BWRITERSPR_RASTOUT:
995         case BWRITERSPR_ATTROUT:
996             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
997              * but are unexpected. If we hit this path it might be due to an error.
998              */
999             FIXME("Unexpected register type %u\n", reg->type);
1000             /* drop through */
1001         case BWRITERSPR_INPUT:
1002         case BWRITERSPR_TEMP:
1003         case BWRITERSPR_CONST:
1004         case BWRITERSPR_ADDR:
1005             token |= d3dsp_register( reg->type, reg->regnum );
1006             if(reg->rel_reg) {
1007                 if(reg->rel_reg->type != BWRITERSPR_ADDR ||
1008                    reg->rel_reg->regnum != 0 ||
1009                    reg->rel_reg->u.swizzle != BWRITERVS_SWIZZLE_X) {
1010                     WARN("Relative addressing in vs_1_x is only allowed with a0.x\n");
1011                     This->state = E_INVALIDARG;
1012                     return;
1013                 }
1014                 token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1015             }
1016             break;
1017 
1018         default:
1019             WARN("Invalid register type for 1.x vshader\n");
1020             This->state = E_INVALIDARG;
1021             return;
1022     }
1023 
1024     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1025 
1026     token |= d3d9_srcmod(reg->srcmod);
1027     put_dword(buffer, token);
1028 }
1029 
1030 static void write_srcregs(struct bc_writer *This, const struct instruction *instr,
1031                           struct bytecode_buffer *buffer){
1032     unsigned int i;
1033     if(instr->has_predicate){
1034         This->funcs->srcreg(This, &instr->predicate, buffer);
1035     }
1036     for(i = 0; i < instr->num_srcs; i++){
1037         This->funcs->srcreg(This, &instr->src[i], buffer);
1038     }
1039 }
1040 
1041 static DWORD map_ps13_temp(struct bc_writer *This, const struct shader_reg *reg) {
1042     if(reg->regnum == T0_REG) {
1043         return d3dsp_register( D3DSPR_TEXTURE, 0 );
1044     } else if(reg->regnum == T1_REG) {
1045         return d3dsp_register( D3DSPR_TEXTURE, 1 );
1046     } else if(reg->regnum == T2_REG) {
1047         return d3dsp_register( D3DSPR_TEXTURE, 2 );
1048     } else if(reg->regnum == T3_REG) {
1049         return d3dsp_register( D3DSPR_TEXTURE, 3 );
1050     } else {
1051         return d3dsp_register( D3DSPR_TEMP, reg->regnum );
1052     }
1053 }
1054 
1055 static DWORD map_ps_input(struct bc_writer *This,
1056                           const struct shader_reg *reg) {
1057     DWORD i;
1058     /* Map color interpolators */
1059     for(i = 0; i < 2; i++) {
1060         if(reg->regnum == This->v_regnum[i]) {
1061             return d3dsp_register( D3DSPR_INPUT, i );
1062         }
1063     }
1064     for(i = 0; i < 8; i++) {
1065         if(reg->regnum == This->t_regnum[i]) {
1066             return d3dsp_register( D3DSPR_TEXTURE, i );
1067         }
1068     }
1069 
1070     WARN("Invalid ps 1/2 varying\n");
1071     This->state = E_INVALIDARG;
1072     return 0;
1073 }
1074 
1075 static void ps_1_0123_srcreg(struct bc_writer *This, const struct shader_reg *reg,
1076                              struct bytecode_buffer *buffer) {
1077     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1078     if(reg->rel_reg) {
1079         WARN("Relative addressing not supported in <= ps_3_0\n");
1080         This->state = E_INVALIDARG;
1081         return;
1082     }
1083 
1084     switch(reg->type) {
1085         case BWRITERSPR_INPUT:
1086             token |= map_ps_input(This, reg);
1087             break;
1088 
1089             /* Take care about the texture temporaries. There's a problem: They aren't
1090              * declared anywhere, so we can only hardcode the values that are used
1091              * to map ps_1_3 shaders to the common shader structure
1092              */
1093         case BWRITERSPR_TEMP:
1094             token |= map_ps13_temp(This, reg);
1095             break;
1096 
1097         case BWRITERSPR_CONST: /* Can be mapped 1:1 */
1098             token |= d3dsp_register( reg->type, reg->regnum );
1099             break;
1100 
1101         default:
1102             WARN("Invalid register type for <= ps_1_3 shader\n");
1103             This->state = E_INVALIDARG;
1104             return;
1105     }
1106 
1107     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1108 
1109     if(reg->srcmod == BWRITERSPSM_DZ || reg->srcmod == BWRITERSPSM_DW ||
1110        reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
1111        reg->srcmod == BWRITERSPSM_NOT) {
1112         WARN("Invalid source modifier %u for <= ps_1_3\n", reg->srcmod);
1113         This->state = E_INVALIDARG;
1114         return;
1115     }
1116     token |= d3d9_srcmod(reg->srcmod);
1117     put_dword(buffer, token);
1118 }
1119 
1120 static void ps_1_0123_dstreg(struct bc_writer *This, const struct shader_reg *reg,
1121                              struct bytecode_buffer *buffer,
1122                              DWORD shift, DWORD mod) {
1123     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1124 
1125     if(reg->rel_reg) {
1126         WARN("Relative addressing not supported for destination registers\n");
1127         This->state = E_INVALIDARG;
1128         return;
1129     }
1130 
1131     switch(reg->type) {
1132         case BWRITERSPR_TEMP:
1133             token |= map_ps13_temp(This, reg);
1134             break;
1135 
1136         /* texkill uses the input register as a destination parameter */
1137         case BWRITERSPR_INPUT:
1138             token |= map_ps_input(This, reg);
1139             break;
1140 
1141         default:
1142             WARN("Invalid dest register type for 1.x pshader\n");
1143             This->state = E_INVALIDARG;
1144             return;
1145     }
1146 
1147     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1148     token |= d3d9_dstmod(mod);
1149 
1150     token |= d3d9_writemask(reg->u.writemask);
1151     put_dword(buffer, token);
1152 }
1153 
1154 /* The length of an instruction consists of the destination register (if any),
1155  * the number of source registers, the number of address registers used for
1156  * indirect addressing, and optionally the predicate register
1157  */
1158 static DWORD instrlen(const struct instruction *instr, unsigned int srcs, unsigned int dsts) {
1159     unsigned int i;
1160     DWORD ret = srcs + dsts + (instr->has_predicate ? 1 : 0);
1161 
1162     if(dsts){
1163         if(instr->dst.rel_reg) ret++;
1164     }
1165     for(i = 0; i < srcs; i++) {
1166         if(instr->src[i].rel_reg) ret++;
1167     }
1168     return ret;
1169 }
1170 
1171 static void sm_1_x_opcode(struct bc_writer *This,
1172                           const struct instruction *instr,
1173                           DWORD token, struct bytecode_buffer *buffer) {
1174     /* In sm_1_x instruction length isn't encoded */
1175     if(instr->coissue){
1176         token |= D3DSI_COISSUE;
1177     }
1178     put_dword(buffer, token);
1179 }
1180 
1181 static void instr_handler(struct bc_writer *This,
1182                           const struct instruction *instr,
1183                           struct bytecode_buffer *buffer) {
1184     DWORD token = d3d9_opcode(instr->opcode);
1185 
1186     This->funcs->opcode(This, instr, token, buffer);
1187     if(instr->has_dst) This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1188     write_srcregs(This, instr, buffer);
1189 }
1190 
1191 static const struct instr_handler_table vs_1_x_handlers[] = {
1192     {BWRITERSIO_ADD,            instr_handler},
1193     {BWRITERSIO_NOP,            instr_handler},
1194     {BWRITERSIO_MOV,            instr_handler},
1195     {BWRITERSIO_SUB,            instr_handler},
1196     {BWRITERSIO_MAD,            instr_handler},
1197     {BWRITERSIO_MUL,            instr_handler},
1198     {BWRITERSIO_RCP,            instr_handler},
1199     {BWRITERSIO_RSQ,            instr_handler},
1200     {BWRITERSIO_DP3,            instr_handler},
1201     {BWRITERSIO_DP4,            instr_handler},
1202     {BWRITERSIO_MIN,            instr_handler},
1203     {BWRITERSIO_MAX,            instr_handler},
1204     {BWRITERSIO_SLT,            instr_handler},
1205     {BWRITERSIO_SGE,            instr_handler},
1206     {BWRITERSIO_EXP,            instr_handler},
1207     {BWRITERSIO_LOG,            instr_handler},
1208     {BWRITERSIO_EXPP,           instr_handler},
1209     {BWRITERSIO_LOGP,           instr_handler},
1210     {BWRITERSIO_DST,            instr_handler},
1211     {BWRITERSIO_FRC,            instr_handler},
1212     {BWRITERSIO_M4x4,           instr_handler},
1213     {BWRITERSIO_M4x3,           instr_handler},
1214     {BWRITERSIO_M3x4,           instr_handler},
1215     {BWRITERSIO_M3x3,           instr_handler},
1216     {BWRITERSIO_M3x2,           instr_handler},
1217     {BWRITERSIO_LIT,            instr_handler},
1218 
1219     {BWRITERSIO_END,            NULL}, /* Sentinel value, it signals
1220                                           the end of the list */
1221 };
1222 
1223 static const struct bytecode_backend vs_1_x_backend = {
1224     vs_1_x_header,
1225     end,
1226     vs_1_x_srcreg,
1227     vs_12_dstreg,
1228     sm_1_x_opcode,
1229     vs_1_x_handlers
1230 };
1231 
1232 static void instr_ps_1_0123_texld(struct bc_writer *This,
1233                                   const struct instruction *instr,
1234                                   struct bytecode_buffer *buffer) {
1235     DWORD idx;
1236     struct shader_reg reg;
1237     DWORD swizzlemask;
1238 
1239     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1240        instr->src[1].regnum > 3) {
1241         WARN("Unsupported sampler type %u regnum %u\n",
1242              instr->src[1].type, instr->src[1].regnum);
1243         This->state = E_INVALIDARG;
1244         return;
1245     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1246         WARN("Can only sample into a temp register\n");
1247         This->state = E_INVALIDARG;
1248         return;
1249     }
1250 
1251     idx = instr->src[1].regnum;
1252     if((idx == 0 && instr->dst.regnum != T0_REG) ||
1253        (idx == 1 && instr->dst.regnum != T1_REG) ||
1254        (idx == 2 && instr->dst.regnum != T2_REG) ||
1255        (idx == 3 && instr->dst.regnum != T3_REG)) {
1256         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_x\n",
1257              idx, instr->dst.regnum);
1258         This->state = E_INVALIDARG;
1259         return;
1260     }
1261     if(instr->src[0].type == BWRITERSPR_INPUT) {
1262         /* A simple non-dependent read tex instruction */
1263         if(instr->src[0].regnum != This->t_regnum[idx]) {
1264             WARN("Cannot sample from s%u with texture address data from interpolator %u\n",
1265                  idx, instr->src[0].regnum);
1266             This->state = E_INVALIDARG;
1267             return;
1268         }
1269         This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1270 
1271         /* map the temp dstreg to the ps_1_3 texture temporary register */
1272         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1273     } else if(instr->src[0].type == BWRITERSPR_TEMP) {
1274 
1275         swizzlemask = (3 << BWRITERVS_SWIZZLE_SHIFT) |
1276             (3 << (BWRITERVS_SWIZZLE_SHIFT + 2)) |
1277             (3 << (BWRITERVS_SWIZZLE_SHIFT + 4));
1278         if((instr->src[0].u.swizzle & swizzlemask) == (BWRITERVS_X_X | BWRITERVS_Y_Y | BWRITERVS_Z_Z)) {
1279             TRACE("writing texreg2rgb\n");
1280             This->funcs->opcode(This, instr, D3DSIO_TEXREG2RGB & D3DSI_OPCODE_MASK, buffer);
1281         } else if(instr->src[0].u.swizzle == (BWRITERVS_X_W | BWRITERVS_Y_X | BWRITERVS_Z_X | BWRITERVS_W_X)) {
1282             TRACE("writing texreg2ar\n");
1283             This->funcs->opcode(This, instr, D3DSIO_TEXREG2AR & D3DSI_OPCODE_MASK, buffer);
1284         } else if(instr->src[0].u.swizzle == (BWRITERVS_X_Y | BWRITERVS_Y_Z | BWRITERVS_Z_Z | BWRITERVS_W_Z)) {
1285             TRACE("writing texreg2gb\n");
1286             This->funcs->opcode(This, instr, D3DSIO_TEXREG2GB & D3DSI_OPCODE_MASK, buffer);
1287         } else {
1288             WARN("Unsupported src addr swizzle in dependent texld: 0x%08x\n", instr->src[0].u.swizzle);
1289             This->state = E_INVALIDARG;
1290             return;
1291         }
1292 
1293         /* Dst and src reg can be mapped normally. Both registers are temporary registers in the
1294          * source shader and have to be mapped to the temporary form of the texture registers. However,
1295          * the src reg doesn't have a swizzle
1296          */
1297         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1298         reg = instr->src[0];
1299         reg.u.swizzle = BWRITERVS_NOSWIZZLE;
1300         This->funcs->srcreg(This, &reg, buffer);
1301     } else {
1302         WARN("Invalid address data source register\n");
1303         This->state = E_INVALIDARG;
1304         return;
1305     }
1306 }
1307 
1308 static void instr_ps_1_0123_mov(struct bc_writer *This,
1309                                 const struct instruction *instr,
1310                                 struct bytecode_buffer *buffer) {
1311     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1312 
1313     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1314         if((instr->dst.regnum == T0_REG && instr->src[0].regnum == This->t_regnum[0]) ||
1315            (instr->dst.regnum == T1_REG && instr->src[0].regnum == This->t_regnum[1]) ||
1316            (instr->dst.regnum == T2_REG && instr->src[0].regnum == This->t_regnum[2]) ||
1317            (instr->dst.regnum == T3_REG && instr->src[0].regnum == This->t_regnum[3])) {
1318             if(instr->dstmod & BWRITERSPDM_SATURATE) {
1319                 This->funcs->opcode(This, instr, D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK, buffer);
1320                 /* Remove the SATURATE flag, it's implicit to the instruction */
1321                 This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod & (~BWRITERSPDM_SATURATE));
1322                 return;
1323             } else {
1324                 WARN("A varying -> temp copy is only supported with the SATURATE modifier in <=ps_1_3\n");
1325                 This->state = E_INVALIDARG;
1326                 return;
1327             }
1328         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1329                   instr->src[0].regnum == This->v_regnum[1]) {
1330             /* Handled by the normal mov below. Just drop out of the if condition */
1331         } else {
1332             WARN("Unsupported varying -> temp mov in <= ps_1_3\n");
1333             This->state = E_INVALIDARG;
1334             return;
1335         }
1336     }
1337 
1338     This->funcs->opcode(This, instr, token, buffer);
1339     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1340     This->funcs->srcreg(This, &instr->src[0], buffer);
1341 }
1342 
1343 static const struct instr_handler_table ps_1_0123_handlers[] = {
1344     {BWRITERSIO_ADD,            instr_handler},
1345     {BWRITERSIO_NOP,            instr_handler},
1346     {BWRITERSIO_MOV,            instr_ps_1_0123_mov},
1347     {BWRITERSIO_SUB,            instr_handler},
1348     {BWRITERSIO_MAD,            instr_handler},
1349     {BWRITERSIO_MUL,            instr_handler},
1350     {BWRITERSIO_DP3,            instr_handler},
1351     {BWRITERSIO_DP4,            instr_handler},
1352     {BWRITERSIO_LRP,            instr_handler},
1353 
1354     /* pshader instructions */
1355     {BWRITERSIO_CND,            instr_handler},
1356     {BWRITERSIO_CMP,            instr_handler},
1357     {BWRITERSIO_TEXKILL,        instr_handler},
1358     {BWRITERSIO_TEX,            instr_ps_1_0123_texld},
1359     {BWRITERSIO_TEXBEM,         instr_handler},
1360     {BWRITERSIO_TEXBEML,        instr_handler},
1361     {BWRITERSIO_TEXM3x2PAD,     instr_handler},
1362     {BWRITERSIO_TEXM3x3PAD,     instr_handler},
1363     {BWRITERSIO_TEXM3x3SPEC,    instr_handler},
1364     {BWRITERSIO_TEXM3x3VSPEC,   instr_handler},
1365     {BWRITERSIO_TEXM3x3TEX,     instr_handler},
1366     {BWRITERSIO_TEXM3x3,        instr_handler},
1367     {BWRITERSIO_TEXM3x2DEPTH,   instr_handler},
1368     {BWRITERSIO_TEXM3x2TEX,     instr_handler},
1369     {BWRITERSIO_TEXDP3,         instr_handler},
1370     {BWRITERSIO_TEXDP3TEX,      instr_handler},
1371     {BWRITERSIO_END,            NULL},
1372 };
1373 
1374 static const struct bytecode_backend ps_1_0123_backend = {
1375     ps_1_x_header,
1376     end,
1377     ps_1_0123_srcreg,
1378     ps_1_0123_dstreg,
1379     sm_1_x_opcode,
1380     ps_1_0123_handlers
1381 };
1382 
1383 static void ps_1_4_srcreg(struct bc_writer *This, const struct shader_reg *reg,
1384                           struct bytecode_buffer *buffer) {
1385     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1386     if(reg->rel_reg) {
1387         WARN("Relative addressing not supported in <= ps_3_0\n");
1388         This->state = E_INVALIDARG;
1389         return;
1390     }
1391 
1392     switch(reg->type) {
1393         case BWRITERSPR_INPUT:
1394             token |= map_ps_input(This, reg);
1395             break;
1396 
1397         /* Can be mapped 1:1 */
1398         case BWRITERSPR_TEMP:
1399         case BWRITERSPR_CONST:
1400             token |= d3dsp_register( reg->type, reg->regnum );
1401             break;
1402 
1403         default:
1404             WARN("Invalid register type for ps_1_4 shader\n");
1405             This->state = E_INVALIDARG;
1406             return;
1407     }
1408 
1409     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1410 
1411     if(reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
1412        reg->srcmod == BWRITERSPSM_NOT) {
1413         WARN("Invalid source modifier %u for ps_1_4\n", reg->srcmod);
1414         This->state = E_INVALIDARG;
1415         return;
1416     }
1417     token |= d3d9_srcmod(reg->srcmod);
1418     put_dword(buffer, token);
1419 }
1420 
1421 static void ps_1_4_dstreg(struct bc_writer *This, const struct shader_reg *reg,
1422                           struct bytecode_buffer *buffer,
1423                           DWORD shift, DWORD mod) {
1424     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1425 
1426     if(reg->rel_reg) {
1427         WARN("Relative addressing not supported for destination registers\n");
1428         This->state = E_INVALIDARG;
1429         return;
1430     }
1431 
1432     switch(reg->type) {
1433         case BWRITERSPR_TEMP: /* 1:1 mapping */
1434             token |= d3dsp_register( reg->type, reg->regnum );
1435             break;
1436 
1437 	/* For texkill */
1438         case BWRITERSPR_INPUT:
1439             token |= map_ps_input(This, reg);
1440             break;
1441 
1442         default:
1443             WARN("Invalid dest register type for 1.x pshader\n");
1444             This->state = E_INVALIDARG;
1445             return;
1446     }
1447 
1448     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1449     token |= d3d9_dstmod(mod);
1450 
1451     token |= d3d9_writemask(reg->u.writemask);
1452     put_dword(buffer, token);
1453 }
1454 
1455 static void instr_ps_1_4_mov(struct bc_writer *This,
1456                              const struct instruction *instr,
1457                              struct bytecode_buffer *buffer) {
1458     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1459 
1460     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1461         if(instr->src[0].regnum == This->t_regnum[0] ||
1462            instr->src[0].regnum == This->t_regnum[1] ||
1463            instr->src[0].regnum == This->t_regnum[2] ||
1464            instr->src[0].regnum == This->t_regnum[3] ||
1465            instr->src[0].regnum == This->t_regnum[4] ||
1466            instr->src[0].regnum == This->t_regnum[5]) {
1467             /* Similar to a regular mov, but a different opcode */
1468             token = D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK;
1469         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1470                   instr->src[0].regnum == This->v_regnum[1]) {
1471             /* Handled by the normal mov below. Just drop out of the if condition */
1472         } else {
1473             WARN("Unsupported varying -> temp mov in ps_1_4\n");
1474             This->state = E_INVALIDARG;
1475             return;
1476         }
1477     }
1478 
1479     This->funcs->opcode(This, instr, token, buffer);
1480     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1481     This->funcs->srcreg(This, &instr->src[0], buffer);
1482 }
1483 
1484 static void instr_ps_1_4_texld(struct bc_writer *This,
1485                                const struct instruction *instr,
1486                                struct bytecode_buffer *buffer) {
1487     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1488        instr->src[1].regnum > 5) {
1489         WARN("Unsupported sampler type %u regnum %u\n",
1490              instr->src[1].type, instr->src[1].regnum);
1491         This->state = E_INVALIDARG;
1492         return;
1493     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1494         WARN("Can only sample into a temp register\n");
1495         This->state = E_INVALIDARG;
1496         return;
1497     }
1498 
1499     if(instr->src[1].regnum != instr->dst.regnum) {
1500         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_4\n",
1501              instr->src[1].regnum, instr->dst.regnum);
1502         This->state = E_INVALIDARG;
1503         return;
1504     }
1505 
1506     This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1507     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1508     This->funcs->srcreg(This, &instr->src[0], buffer);
1509 }
1510 
1511 static const struct instr_handler_table ps_1_4_handlers[] = {
1512     {BWRITERSIO_ADD,            instr_handler},
1513     {BWRITERSIO_NOP,            instr_handler},
1514     {BWRITERSIO_MOV,            instr_ps_1_4_mov},
1515     {BWRITERSIO_SUB,            instr_handler},
1516     {BWRITERSIO_MAD,            instr_handler},
1517     {BWRITERSIO_MUL,            instr_handler},
1518     {BWRITERSIO_DP3,            instr_handler},
1519     {BWRITERSIO_DP4,            instr_handler},
1520     {BWRITERSIO_LRP,            instr_handler},
1521 
1522     /* pshader instructions */
1523     {BWRITERSIO_CND,            instr_handler},
1524     {BWRITERSIO_CMP,            instr_handler},
1525     {BWRITERSIO_TEXKILL,        instr_handler},
1526     {BWRITERSIO_TEX,            instr_ps_1_4_texld},
1527     {BWRITERSIO_TEXDEPTH,       instr_handler},
1528     {BWRITERSIO_BEM,            instr_handler},
1529 
1530     {BWRITERSIO_PHASE,          instr_handler},
1531     {BWRITERSIO_END,            NULL},
1532 };
1533 
1534 static const struct bytecode_backend ps_1_4_backend = {
1535     ps_1_4_header,
1536     end,
1537     ps_1_4_srcreg,
1538     ps_1_4_dstreg,
1539     sm_1_x_opcode,
1540     ps_1_4_handlers
1541 };
1542 
1543 static void write_constB(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1544     write_const(shader->constB, shader->num_cb, D3DSIO_DEFB, D3DSPR_CONSTBOOL, buffer, len);
1545 }
1546 
1547 static void write_constI(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1548     write_const(shader->constI, shader->num_ci, D3DSIO_DEFI, D3DSPR_CONSTINT, buffer, len);
1549 }
1550 
1551 static void vs_2_header(struct bc_writer *This,
1552                         const struct bwriter_shader *shader,
1553                         struct bytecode_buffer *buffer) {
1554     HRESULT hr;
1555 
1556     hr = vs_find_builtin_varyings(This, shader);
1557     if(FAILED(hr)) {
1558         This->state = hr;
1559         return;
1560     }
1561 
1562     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1563     write_constF(shader, buffer, TRUE);
1564     write_constB(shader, buffer, TRUE);
1565     write_constI(shader, buffer, TRUE);
1566 }
1567 
1568 static void vs_2_srcreg(struct bc_writer *This,
1569                         const struct shader_reg *reg,
1570                         struct bytecode_buffer *buffer) {
1571     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1572     DWORD has_swizzle;
1573     DWORD component;
1574     DWORD d3d9reg;
1575 
1576     switch(reg->type) {
1577         case BWRITERSPR_OUTPUT:
1578             /* Map the swizzle to a writemask, the format expected
1579                by map_vs_output
1580              */
1581             switch(reg->u.swizzle) {
1582                 case BWRITERVS_SWIZZLE_X:
1583                     component = BWRITERSP_WRITEMASK_0;
1584                     break;
1585                 case BWRITERVS_SWIZZLE_Y:
1586                     component = BWRITERSP_WRITEMASK_1;
1587                     break;
1588                 case BWRITERVS_SWIZZLE_Z:
1589                     component = BWRITERSP_WRITEMASK_2;
1590                     break;
1591                 case BWRITERVS_SWIZZLE_W:
1592                     component = BWRITERSP_WRITEMASK_3;
1593                     break;
1594                 default:
1595                     component = 0;
1596             }
1597             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
1598             break;
1599 
1600         case BWRITERSPR_RASTOUT:
1601         case BWRITERSPR_ATTROUT:
1602             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
1603              * but are unexpected. If we hit this path it might be due to an error.
1604              */
1605             FIXME("Unexpected register type %u\n", reg->type);
1606             /* drop through */
1607         case BWRITERSPR_INPUT:
1608         case BWRITERSPR_TEMP:
1609         case BWRITERSPR_CONST:
1610         case BWRITERSPR_ADDR:
1611         case BWRITERSPR_CONSTINT:
1612         case BWRITERSPR_CONSTBOOL:
1613         case BWRITERSPR_LABEL:
1614             d3d9reg = d3d9_register(reg->type);
1615             token |= d3dsp_register( d3d9reg, reg->regnum );
1616             break;
1617 
1618         case BWRITERSPR_LOOP:
1619             if(reg->regnum != 0) {
1620                 WARN("Only regnum 0 is supported for the loop index register in vs_2_0\n");
1621                 This->state = E_INVALIDARG;
1622                 return;
1623             }
1624             token |= d3dsp_register( D3DSPR_LOOP, 0 );
1625             break;
1626 
1627         case BWRITERSPR_PREDICATE:
1628             if(This->version != BWRITERVS_VERSION(2, 1)){
1629                 WARN("Predicate register is allowed only in vs_2_x\n");
1630                 This->state = E_INVALIDARG;
1631                 return;
1632             }
1633             if(reg->regnum > 0) {
1634                 WARN("Only predicate register 0 is supported\n");
1635                 This->state = E_INVALIDARG;
1636                 return;
1637             }
1638             token |= d3dsp_register( D3DSPR_PREDICATE, 0 );
1639             break;
1640 
1641         default:
1642             WARN("Invalid register type for 2.0 vshader\n");
1643             This->state = E_INVALIDARG;
1644             return;
1645     }
1646 
1647     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1648 
1649     token |= d3d9_srcmod(reg->srcmod);
1650 
1651     if(reg->rel_reg)
1652         token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1653 
1654     put_dword(buffer, token);
1655 
1656     /* vs_2_0 and newer write the register containing the index explicitly in the
1657      * binary code
1658      */
1659     if(token & D3DVS_ADDRMODE_RELATIVE)
1660         vs_2_srcreg(This, reg->rel_reg, buffer);
1661 }
1662 
1663 static void sm_2_opcode(struct bc_writer *This,
1664                         const struct instruction *instr,
1665                         DWORD token, struct bytecode_buffer *buffer) {
1666     /* From sm 2 onwards instruction length is encoded in the opcode field */
1667     int dsts = instr->has_dst ? 1 : 0;
1668     token |= instrlen(instr, instr->num_srcs, dsts) << D3DSI_INSTLENGTH_SHIFT;
1669     if(instr->comptype)
1670         token |= (d3d9_comparetype(instr->comptype) << 16) & (0xf << 16);
1671     if(instr->has_predicate)
1672         token |= D3DSHADER_INSTRUCTION_PREDICATED;
1673     put_dword(buffer,token);
1674 }
1675 
1676 static const struct instr_handler_table vs_2_0_handlers[] = {
1677     {BWRITERSIO_ADD,            instr_handler},
1678     {BWRITERSIO_NOP,            instr_handler},
1679     {BWRITERSIO_MOV,            instr_handler},
1680     {BWRITERSIO_SUB,            instr_handler},
1681     {BWRITERSIO_MAD,            instr_handler},
1682     {BWRITERSIO_MUL,            instr_handler},
1683     {BWRITERSIO_RCP,            instr_handler},
1684     {BWRITERSIO_RSQ,            instr_handler},
1685     {BWRITERSIO_DP3,            instr_handler},
1686     {BWRITERSIO_DP4,            instr_handler},
1687     {BWRITERSIO_MIN,            instr_handler},
1688     {BWRITERSIO_MAX,            instr_handler},
1689     {BWRITERSIO_SLT,            instr_handler},
1690     {BWRITERSIO_SGE,            instr_handler},
1691     {BWRITERSIO_ABS,            instr_handler},
1692     {BWRITERSIO_EXP,            instr_handler},
1693     {BWRITERSIO_LOG,            instr_handler},
1694     {BWRITERSIO_EXPP,           instr_handler},
1695     {BWRITERSIO_LOGP,           instr_handler},
1696     {BWRITERSIO_DST,            instr_handler},
1697     {BWRITERSIO_LRP,            instr_handler},
1698     {BWRITERSIO_FRC,            instr_handler},
1699     {BWRITERSIO_CRS,            instr_handler},
1700     {BWRITERSIO_SGN,            instr_handler},
1701     {BWRITERSIO_NRM,            instr_handler},
1702     {BWRITERSIO_SINCOS,         instr_handler},
1703     {BWRITERSIO_M4x4,           instr_handler},
1704     {BWRITERSIO_M4x3,           instr_handler},
1705     {BWRITERSIO_M3x4,           instr_handler},
1706     {BWRITERSIO_M3x3,           instr_handler},
1707     {BWRITERSIO_M3x2,           instr_handler},
1708     {BWRITERSIO_LIT,            instr_handler},
1709     {BWRITERSIO_POW,            instr_handler},
1710     {BWRITERSIO_MOVA,           instr_handler},
1711 
1712     {BWRITERSIO_CALL,           instr_handler},
1713     {BWRITERSIO_CALLNZ,         instr_handler},
1714     {BWRITERSIO_REP,            instr_handler},
1715     {BWRITERSIO_ENDREP,         instr_handler},
1716     {BWRITERSIO_IF,             instr_handler},
1717     {BWRITERSIO_LABEL,          instr_handler},
1718     {BWRITERSIO_ELSE,           instr_handler},
1719     {BWRITERSIO_ENDIF,          instr_handler},
1720     {BWRITERSIO_LOOP,           instr_handler},
1721     {BWRITERSIO_RET,            instr_handler},
1722     {BWRITERSIO_ENDLOOP,        instr_handler},
1723 
1724     {BWRITERSIO_END,            NULL},
1725 };
1726 
1727 static const struct bytecode_backend vs_2_0_backend = {
1728     vs_2_header,
1729     end,
1730     vs_2_srcreg,
1731     vs_12_dstreg,
1732     sm_2_opcode,
1733     vs_2_0_handlers
1734 };
1735 
1736 static const struct instr_handler_table vs_2_x_handlers[] = {
1737     {BWRITERSIO_ADD,            instr_handler},
1738     {BWRITERSIO_NOP,            instr_handler},
1739     {BWRITERSIO_MOV,            instr_handler},
1740     {BWRITERSIO_SUB,            instr_handler},
1741     {BWRITERSIO_MAD,            instr_handler},
1742     {BWRITERSIO_MUL,            instr_handler},
1743     {BWRITERSIO_RCP,            instr_handler},
1744     {BWRITERSIO_RSQ,            instr_handler},
1745     {BWRITERSIO_DP3,            instr_handler},
1746     {BWRITERSIO_DP4,            instr_handler},
1747     {BWRITERSIO_MIN,            instr_handler},
1748     {BWRITERSIO_MAX,            instr_handler},
1749     {BWRITERSIO_SLT,            instr_handler},
1750     {BWRITERSIO_SGE,            instr_handler},
1751     {BWRITERSIO_ABS,            instr_handler},
1752     {BWRITERSIO_EXP,            instr_handler},
1753     {BWRITERSIO_LOG,            instr_handler},
1754     {BWRITERSIO_EXPP,           instr_handler},
1755     {BWRITERSIO_LOGP,           instr_handler},
1756     {BWRITERSIO_DST,            instr_handler},
1757     {BWRITERSIO_LRP,            instr_handler},
1758     {BWRITERSIO_FRC,            instr_handler},
1759     {BWRITERSIO_CRS,            instr_handler},
1760     {BWRITERSIO_SGN,            instr_handler},
1761     {BWRITERSIO_NRM,            instr_handler},
1762     {BWRITERSIO_SINCOS,         instr_handler},
1763     {BWRITERSIO_M4x4,           instr_handler},
1764     {BWRITERSIO_M4x3,           instr_handler},
1765     {BWRITERSIO_M3x4,           instr_handler},
1766     {BWRITERSIO_M3x3,           instr_handler},
1767     {BWRITERSIO_M3x2,           instr_handler},
1768     {BWRITERSIO_LIT,            instr_handler},
1769     {BWRITERSIO_POW,            instr_handler},
1770     {BWRITERSIO_MOVA,           instr_handler},
1771 
1772     {BWRITERSIO_CALL,           instr_handler},
1773     {BWRITERSIO_CALLNZ,         instr_handler},
1774     {BWRITERSIO_REP,            instr_handler},
1775     {BWRITERSIO_ENDREP,         instr_handler},
1776     {BWRITERSIO_IF,             instr_handler},
1777     {BWRITERSIO_LABEL,          instr_handler},
1778     {BWRITERSIO_IFC,            instr_handler},
1779     {BWRITERSIO_ELSE,           instr_handler},
1780     {BWRITERSIO_ENDIF,          instr_handler},
1781     {BWRITERSIO_BREAK,          instr_handler},
1782     {BWRITERSIO_BREAKC,         instr_handler},
1783     {BWRITERSIO_LOOP,           instr_handler},
1784     {BWRITERSIO_RET,            instr_handler},
1785     {BWRITERSIO_ENDLOOP,        instr_handler},
1786 
1787     {BWRITERSIO_SETP,           instr_handler},
1788     {BWRITERSIO_BREAKP,         instr_handler},
1789 
1790     {BWRITERSIO_END,            NULL},
1791 };
1792 
1793 static const struct bytecode_backend vs_2_x_backend = {
1794     vs_2_header,
1795     end,
1796     vs_2_srcreg,
1797     vs_12_dstreg,
1798     sm_2_opcode,
1799     vs_2_x_handlers
1800 };
1801 
1802 static void write_samplers(const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1803     DWORD i;
1804     DWORD instr_dcl = D3DSIO_DCL | (2 << D3DSI_INSTLENGTH_SHIFT);
1805     DWORD token;
1806     const DWORD reg = (1u << 31) | d3dsp_register( D3DSPR_SAMPLER, 0 ) | D3DSP_WRITEMASK_ALL;
1807 
1808     for(i = 0; i < shader->num_samplers; i++) {
1809         /* Write the DCL instruction */
1810         put_dword(buffer, instr_dcl);
1811         token = (1u << 31);
1812         /* Already shifted */
1813         token |= (d3d9_sampler(shader->samplers[i].type)) & D3DSP_TEXTURETYPE_MASK;
1814         put_dword(buffer, token);
1815         token = reg | (shader->samplers[i].regnum & D3DSP_REGNUM_MASK);
1816         token |= d3d9_dstmod(shader->samplers[i].mod);
1817         put_dword(buffer, token);
1818     }
1819 }
1820 
1821 static void ps_2_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1822     HRESULT hr = find_ps_builtin_semantics(This, shader, 8);
1823     if(FAILED(hr)) {
1824         This->state = hr;
1825         return;
1826     }
1827 
1828     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1829     write_samplers(shader, buffer);
1830     write_constF(shader, buffer, TRUE);
1831     write_constB(shader, buffer, TRUE);
1832     write_constI(shader, buffer, TRUE);
1833 }
1834 
1835 static void ps_2_srcreg(struct bc_writer *This,
1836                         const struct shader_reg *reg,
1837                         struct bytecode_buffer *buffer) {
1838     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1839     DWORD d3d9reg;
1840     if(reg->rel_reg) {
1841         WARN("Relative addressing not supported in <= ps_3_0\n");
1842         This->state = E_INVALIDARG;
1843         return;
1844     }
1845 
1846     switch(reg->type) {
1847         case BWRITERSPR_INPUT:
1848             token |= map_ps_input(This, reg);
1849             break;
1850 
1851             /* Can be mapped 1:1 */
1852         case BWRITERSPR_TEMP:
1853         case BWRITERSPR_CONST:
1854         case BWRITERSPR_COLOROUT:
1855         case BWRITERSPR_CONSTBOOL:
1856         case BWRITERSPR_CONSTINT:
1857         case BWRITERSPR_SAMPLER:
1858         case BWRITERSPR_LABEL:
1859         case BWRITERSPR_DEPTHOUT:
1860             d3d9reg = d3d9_register(reg->type);
1861             token |= d3dsp_register( d3d9reg, reg->regnum );
1862             break;
1863 
1864         case BWRITERSPR_PREDICATE:
1865             if(This->version != BWRITERPS_VERSION(2, 1)){
1866                 WARN("Predicate register not supported in ps_2_0\n");
1867                 This->state = E_INVALIDARG;
1868             }
1869             if(reg->regnum) {
1870                 WARN("Predicate register with regnum %u not supported\n",
1871                      reg->regnum);
1872                 This->state = E_INVALIDARG;
1873             }
1874             token |= d3dsp_register( D3DSPR_PREDICATE, 0 );
1875             break;
1876 
1877         default:
1878             WARN("Invalid register type for ps_2_0 shader\n");
1879             This->state = E_INVALIDARG;
1880             return;
1881     }
1882 
1883     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1884 
1885     token |= d3d9_srcmod(reg->srcmod);
1886     put_dword(buffer, token);
1887 }
1888 
1889 static void ps_2_0_dstreg(struct bc_writer *This,
1890                           const struct shader_reg *reg,
1891                           struct bytecode_buffer *buffer,
1892                           DWORD shift, DWORD mod) {
1893     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1894     DWORD d3d9reg;
1895 
1896     if(reg->rel_reg) {
1897         WARN("Relative addressing not supported for destination registers\n");
1898         This->state = E_INVALIDARG;
1899         return;
1900     }
1901 
1902     switch(reg->type) {
1903         case BWRITERSPR_TEMP: /* 1:1 mapping */
1904         case BWRITERSPR_COLOROUT:
1905         case BWRITERSPR_DEPTHOUT:
1906             d3d9reg = d3d9_register(reg->type);
1907             token |= d3dsp_register( d3d9reg, reg->regnum );
1908             break;
1909 
1910         case BWRITERSPR_PREDICATE:
1911             if(This->version != BWRITERPS_VERSION(2, 1)){
1912                 WARN("Predicate register not supported in ps_2_0\n");
1913                 This->state = E_INVALIDARG;
1914             }
1915             token |= d3dsp_register( D3DSPR_PREDICATE, reg->regnum );
1916             break;
1917 
1918 	/* texkill uses the input register as a destination parameter */
1919         case BWRITERSPR_INPUT:
1920             token |= map_ps_input(This, reg);
1921             break;
1922 
1923         default:
1924             WARN("Invalid dest register type for 2.x pshader\n");
1925             This->state = E_INVALIDARG;
1926             return;
1927     }
1928 
1929     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1930     token |= d3d9_dstmod(mod);
1931 
1932     token |= d3d9_writemask(reg->u.writemask);
1933     put_dword(buffer, token);
1934 }
1935 
1936 static const struct instr_handler_table ps_2_0_handlers[] = {
1937     {BWRITERSIO_ADD,            instr_handler},
1938     {BWRITERSIO_NOP,            instr_handler},
1939     {BWRITERSIO_MOV,            instr_handler},
1940     {BWRITERSIO_SUB,            instr_handler},
1941     {BWRITERSIO_MAD,            instr_handler},
1942     {BWRITERSIO_MUL,            instr_handler},
1943     {BWRITERSIO_RCP,            instr_handler},
1944     {BWRITERSIO_RSQ,            instr_handler},
1945     {BWRITERSIO_DP3,            instr_handler},
1946     {BWRITERSIO_DP4,            instr_handler},
1947     {BWRITERSIO_MIN,            instr_handler},
1948     {BWRITERSIO_MAX,            instr_handler},
1949     {BWRITERSIO_ABS,            instr_handler},
1950     {BWRITERSIO_EXP,            instr_handler},
1951     {BWRITERSIO_LOG,            instr_handler},
1952     {BWRITERSIO_EXPP,           instr_handler},
1953     {BWRITERSIO_LOGP,           instr_handler},
1954     {BWRITERSIO_LRP,            instr_handler},
1955     {BWRITERSIO_FRC,            instr_handler},
1956     {BWRITERSIO_CRS,            instr_handler},
1957     {BWRITERSIO_NRM,            instr_handler},
1958     {BWRITERSIO_SINCOS,         instr_handler},
1959     {BWRITERSIO_M4x4,           instr_handler},
1960     {BWRITERSIO_M4x3,           instr_handler},
1961     {BWRITERSIO_M3x4,           instr_handler},
1962     {BWRITERSIO_M3x3,           instr_handler},
1963     {BWRITERSIO_M3x2,           instr_handler},
1964     {BWRITERSIO_POW,            instr_handler},
1965     {BWRITERSIO_DP2ADD,         instr_handler},
1966     {BWRITERSIO_CMP,            instr_handler},
1967 
1968     {BWRITERSIO_TEX,            instr_handler},
1969     {BWRITERSIO_TEXLDP,         instr_handler},
1970     {BWRITERSIO_TEXLDB,         instr_handler},
1971     {BWRITERSIO_TEXKILL,        instr_handler},
1972 
1973     {BWRITERSIO_END,            NULL},
1974 };
1975 
1976 static const struct bytecode_backend ps_2_0_backend = {
1977     ps_2_header,
1978     end,
1979     ps_2_srcreg,
1980     ps_2_0_dstreg,
1981     sm_2_opcode,
1982     ps_2_0_handlers
1983 };
1984 
1985 static const struct instr_handler_table ps_2_x_handlers[] = {
1986     {BWRITERSIO_ADD,            instr_handler},
1987     {BWRITERSIO_NOP,            instr_handler},
1988     {BWRITERSIO_MOV,            instr_handler},
1989     {BWRITERSIO_SUB,            instr_handler},
1990     {BWRITERSIO_MAD,            instr_handler},
1991     {BWRITERSIO_MUL,            instr_handler},
1992     {BWRITERSIO_RCP,            instr_handler},
1993     {BWRITERSIO_RSQ,            instr_handler},
1994     {BWRITERSIO_DP3,            instr_handler},
1995     {BWRITERSIO_DP4,            instr_handler},
1996     {BWRITERSIO_MIN,            instr_handler},
1997     {BWRITERSIO_MAX,            instr_handler},
1998     {BWRITERSIO_ABS,            instr_handler},
1999     {BWRITERSIO_EXP,            instr_handler},
2000     {BWRITERSIO_LOG,            instr_handler},
2001     {BWRITERSIO_EXPP,           instr_handler},
2002     {BWRITERSIO_LOGP,           instr_handler},
2003     {BWRITERSIO_LRP,            instr_handler},
2004     {BWRITERSIO_FRC,            instr_handler},
2005     {BWRITERSIO_CRS,            instr_handler},
2006     {BWRITERSIO_NRM,            instr_handler},
2007     {BWRITERSIO_SINCOS,         instr_handler},
2008     {BWRITERSIO_M4x4,           instr_handler},
2009     {BWRITERSIO_M4x3,           instr_handler},
2010     {BWRITERSIO_M3x4,           instr_handler},
2011     {BWRITERSIO_M3x3,           instr_handler},
2012     {BWRITERSIO_M3x2,           instr_handler},
2013     {BWRITERSIO_POW,            instr_handler},
2014     {BWRITERSIO_DP2ADD,         instr_handler},
2015     {BWRITERSIO_CMP,            instr_handler},
2016 
2017     {BWRITERSIO_CALL,           instr_handler},
2018     {BWRITERSIO_CALLNZ,         instr_handler},
2019     {BWRITERSIO_REP,            instr_handler},
2020     {BWRITERSIO_ENDREP,         instr_handler},
2021     {BWRITERSIO_IF,             instr_handler},
2022     {BWRITERSIO_LABEL,          instr_handler},
2023     {BWRITERSIO_IFC,            instr_handler},
2024     {BWRITERSIO_ELSE,           instr_handler},
2025     {BWRITERSIO_ENDIF,          instr_handler},
2026     {BWRITERSIO_BREAK,          instr_handler},
2027     {BWRITERSIO_BREAKC,         instr_handler},
2028     {BWRITERSIO_RET,            instr_handler},
2029 
2030     {BWRITERSIO_TEX,            instr_handler},
2031     {BWRITERSIO_TEXLDP,         instr_handler},
2032     {BWRITERSIO_TEXLDB,         instr_handler},
2033     {BWRITERSIO_TEXKILL,        instr_handler},
2034     {BWRITERSIO_DSX,            instr_handler},
2035     {BWRITERSIO_DSY,            instr_handler},
2036 
2037     {BWRITERSIO_SETP,           instr_handler},
2038     {BWRITERSIO_BREAKP,         instr_handler},
2039 
2040     {BWRITERSIO_TEXLDD,         instr_handler},
2041 
2042     {BWRITERSIO_END,            NULL},
2043 };
2044 
2045 static const struct bytecode_backend ps_2_x_backend = {
2046     ps_2_header,
2047     end,
2048     ps_2_srcreg,
2049     ps_2_0_dstreg,
2050     sm_2_opcode,
2051     ps_2_x_handlers
2052 };
2053 
2054 static void sm_3_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
2055     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
2056     write_declarations(This, buffer, TRUE, shader->outputs, shader->num_outputs, BWRITERSPR_OUTPUT);
2057     write_constF(shader, buffer, TRUE);
2058     write_constB(shader, buffer, TRUE);
2059     write_constI(shader, buffer, TRUE);
2060     write_samplers(shader, buffer);
2061 }
2062 
2063 static void sm_3_srcreg(struct bc_writer *This,
2064                         const struct shader_reg *reg,
2065                         struct bytecode_buffer *buffer) {
2066     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
2067     DWORD d3d9reg;
2068 
2069     d3d9reg = d3d9_register(reg->type);
2070     token |= d3dsp_register( d3d9reg, reg->regnum );
2071     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK;
2072     token |= d3d9_srcmod(reg->srcmod);
2073 
2074     if(reg->rel_reg) {
2075         if(reg->type == BWRITERSPR_CONST && This->version == BWRITERPS_VERSION(3, 0)) {
2076             WARN("c%u[...] is unsupported in ps_3_0\n", reg->regnum);
2077             This->state = E_INVALIDARG;
2078             return;
2079         }
2080         if(((reg->rel_reg->type == BWRITERSPR_ADDR && This->version == BWRITERVS_VERSION(3, 0)) ||
2081            reg->rel_reg->type == BWRITERSPR_LOOP) &&
2082            reg->rel_reg->regnum == 0) {
2083             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
2084         } else {
2085             WARN("Unsupported relative addressing register\n");
2086             This->state = E_INVALIDARG;
2087             return;
2088         }
2089     }
2090 
2091     put_dword(buffer, token);
2092 
2093     /* vs_2_0 and newer write the register containing the index explicitly in the
2094      * binary code
2095      */
2096     if(token & D3DVS_ADDRMODE_RELATIVE) {
2097         sm_3_srcreg(This, reg->rel_reg, buffer);
2098     }
2099 }
2100 
2101 static void sm_3_dstreg(struct bc_writer *This,
2102                         const struct shader_reg *reg,
2103                         struct bytecode_buffer *buffer,
2104                         DWORD shift, DWORD mod) {
2105     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
2106     DWORD d3d9reg;
2107 
2108     if(reg->rel_reg) {
2109         if(This->version == BWRITERVS_VERSION(3, 0) &&
2110            reg->type == BWRITERSPR_OUTPUT) {
2111             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
2112         } else {
2113             WARN("Relative addressing not supported for this shader type or register type\n");
2114             This->state = E_INVALIDARG;
2115             return;
2116         }
2117     }
2118 
2119     d3d9reg = d3d9_register(reg->type);
2120     token |= d3dsp_register( d3d9reg, reg->regnum );
2121     token |= d3d9_dstmod(mod);
2122     token |= d3d9_writemask(reg->u.writemask);
2123     put_dword(buffer, token);
2124 
2125     /* vs_2_0 and newer write the register containing the index explicitly in the
2126      * binary code
2127      */
2128     if(token & D3DVS_ADDRMODE_RELATIVE) {
2129         sm_3_srcreg(This, reg->rel_reg, buffer);
2130     }
2131 }
2132 
2133 static const struct instr_handler_table vs_3_handlers[] = {
2134     {BWRITERSIO_ADD,            instr_handler},
2135     {BWRITERSIO_NOP,            instr_handler},
2136     {BWRITERSIO_MOV,            instr_handler},
2137     {BWRITERSIO_SUB,            instr_handler},
2138     {BWRITERSIO_MAD,            instr_handler},
2139     {BWRITERSIO_MUL,            instr_handler},
2140     {BWRITERSIO_RCP,            instr_handler},
2141     {BWRITERSIO_RSQ,            instr_handler},
2142     {BWRITERSIO_DP3,            instr_handler},
2143     {BWRITERSIO_DP4,            instr_handler},
2144     {BWRITERSIO_MIN,            instr_handler},
2145     {BWRITERSIO_MAX,            instr_handler},
2146     {BWRITERSIO_SLT,            instr_handler},
2147     {BWRITERSIO_SGE,            instr_handler},
2148     {BWRITERSIO_ABS,            instr_handler},
2149     {BWRITERSIO_EXP,            instr_handler},
2150     {BWRITERSIO_LOG,            instr_handler},
2151     {BWRITERSIO_EXPP,           instr_handler},
2152     {BWRITERSIO_LOGP,           instr_handler},
2153     {BWRITERSIO_DST,            instr_handler},
2154     {BWRITERSIO_LRP,            instr_handler},
2155     {BWRITERSIO_FRC,            instr_handler},
2156     {BWRITERSIO_CRS,            instr_handler},
2157     {BWRITERSIO_SGN,            instr_handler},
2158     {BWRITERSIO_NRM,            instr_handler},
2159     {BWRITERSIO_SINCOS,         instr_handler},
2160     {BWRITERSIO_M4x4,           instr_handler},
2161     {BWRITERSIO_M4x3,           instr_handler},
2162     {BWRITERSIO_M3x4,           instr_handler},
2163     {BWRITERSIO_M3x3,           instr_handler},
2164     {BWRITERSIO_M3x2,           instr_handler},
2165     {BWRITERSIO_LIT,            instr_handler},
2166     {BWRITERSIO_POW,            instr_handler},
2167     {BWRITERSIO_MOVA,           instr_handler},
2168 
2169     {BWRITERSIO_CALL,           instr_handler},
2170     {BWRITERSIO_CALLNZ,         instr_handler},
2171     {BWRITERSIO_REP,            instr_handler},
2172     {BWRITERSIO_ENDREP,         instr_handler},
2173     {BWRITERSIO_IF,             instr_handler},
2174     {BWRITERSIO_LABEL,          instr_handler},
2175     {BWRITERSIO_IFC,            instr_handler},
2176     {BWRITERSIO_ELSE,           instr_handler},
2177     {BWRITERSIO_ENDIF,          instr_handler},
2178     {BWRITERSIO_BREAK,          instr_handler},
2179     {BWRITERSIO_BREAKC,         instr_handler},
2180     {BWRITERSIO_LOOP,           instr_handler},
2181     {BWRITERSIO_RET,            instr_handler},
2182     {BWRITERSIO_ENDLOOP,        instr_handler},
2183 
2184     {BWRITERSIO_SETP,           instr_handler},
2185     {BWRITERSIO_BREAKP,         instr_handler},
2186     {BWRITERSIO_TEXLDL,         instr_handler},
2187 
2188     {BWRITERSIO_END,            NULL},
2189 };
2190 
2191 static const struct bytecode_backend vs_3_backend = {
2192     sm_3_header,
2193     end,
2194     sm_3_srcreg,
2195     sm_3_dstreg,
2196     sm_2_opcode,
2197     vs_3_handlers
2198 };
2199 
2200 static const struct instr_handler_table ps_3_handlers[] = {
2201     {BWRITERSIO_ADD,            instr_handler},
2202     {BWRITERSIO_NOP,            instr_handler},
2203     {BWRITERSIO_MOV,            instr_handler},
2204     {BWRITERSIO_SUB,            instr_handler},
2205     {BWRITERSIO_MAD,            instr_handler},
2206     {BWRITERSIO_MUL,            instr_handler},
2207     {BWRITERSIO_RCP,            instr_handler},
2208     {BWRITERSIO_RSQ,            instr_handler},
2209     {BWRITERSIO_DP3,            instr_handler},
2210     {BWRITERSIO_DP4,            instr_handler},
2211     {BWRITERSIO_MIN,            instr_handler},
2212     {BWRITERSIO_MAX,            instr_handler},
2213     {BWRITERSIO_ABS,            instr_handler},
2214     {BWRITERSIO_EXP,            instr_handler},
2215     {BWRITERSIO_LOG,            instr_handler},
2216     {BWRITERSIO_EXPP,           instr_handler},
2217     {BWRITERSIO_LOGP,           instr_handler},
2218     {BWRITERSIO_LRP,            instr_handler},
2219     {BWRITERSIO_FRC,            instr_handler},
2220     {BWRITERSIO_CRS,            instr_handler},
2221     {BWRITERSIO_NRM,            instr_handler},
2222     {BWRITERSIO_SINCOS,         instr_handler},
2223     {BWRITERSIO_M4x4,           instr_handler},
2224     {BWRITERSIO_M4x3,           instr_handler},
2225     {BWRITERSIO_M3x4,           instr_handler},
2226     {BWRITERSIO_M3x3,           instr_handler},
2227     {BWRITERSIO_M3x2,           instr_handler},
2228     {BWRITERSIO_POW,            instr_handler},
2229     {BWRITERSIO_DP2ADD,         instr_handler},
2230     {BWRITERSIO_CMP,            instr_handler},
2231 
2232     {BWRITERSIO_CALL,           instr_handler},
2233     {BWRITERSIO_CALLNZ,         instr_handler},
2234     {BWRITERSIO_REP,            instr_handler},
2235     {BWRITERSIO_ENDREP,         instr_handler},
2236     {BWRITERSIO_IF,             instr_handler},
2237     {BWRITERSIO_LABEL,          instr_handler},
2238     {BWRITERSIO_IFC,            instr_handler},
2239     {BWRITERSIO_ELSE,           instr_handler},
2240     {BWRITERSIO_ENDIF,          instr_handler},
2241     {BWRITERSIO_BREAK,          instr_handler},
2242     {BWRITERSIO_BREAKC,         instr_handler},
2243     {BWRITERSIO_LOOP,           instr_handler},
2244     {BWRITERSIO_RET,            instr_handler},
2245     {BWRITERSIO_ENDLOOP,        instr_handler},
2246 
2247     {BWRITERSIO_SETP,           instr_handler},
2248     {BWRITERSIO_BREAKP,         instr_handler},
2249     {BWRITERSIO_TEXLDL,         instr_handler},
2250 
2251     {BWRITERSIO_TEX,            instr_handler},
2252     {BWRITERSIO_TEXLDP,         instr_handler},
2253     {BWRITERSIO_TEXLDB,         instr_handler},
2254     {BWRITERSIO_TEXKILL,        instr_handler},
2255     {BWRITERSIO_DSX,            instr_handler},
2256     {BWRITERSIO_DSY,            instr_handler},
2257     {BWRITERSIO_TEXLDD,         instr_handler},
2258 
2259     {BWRITERSIO_END,            NULL},
2260 };
2261 
2262 static const struct bytecode_backend ps_3_backend = {
2263     sm_3_header,
2264     end,
2265     sm_3_srcreg,
2266     sm_3_dstreg,
2267     sm_2_opcode,
2268     ps_3_handlers
2269 };
2270 
2271 static void init_vs10_dx9_writer(struct bc_writer *writer) {
2272     TRACE("Creating DirectX9 vertex shader 1.0 writer\n");
2273     writer->funcs = &vs_1_x_backend;
2274 }
2275 
2276 static void init_vs11_dx9_writer(struct bc_writer *writer) {
2277     TRACE("Creating DirectX9 vertex shader 1.1 writer\n");
2278     writer->funcs = &vs_1_x_backend;
2279 }
2280 
2281 static void init_vs20_dx9_writer(struct bc_writer *writer) {
2282     TRACE("Creating DirectX9 vertex shader 2.0 writer\n");
2283     writer->funcs = &vs_2_0_backend;
2284 }
2285 
2286 static void init_vs2x_dx9_writer(struct bc_writer *writer) {
2287     TRACE("Creating DirectX9 vertex shader 2.x writer\n");
2288     writer->funcs = &vs_2_x_backend;
2289 }
2290 
2291 static void init_vs30_dx9_writer(struct bc_writer *writer) {
2292     TRACE("Creating DirectX9 vertex shader 3.0 writer\n");
2293     writer->funcs = &vs_3_backend;
2294 }
2295 
2296 static void init_ps10_dx9_writer(struct bc_writer *writer) {
2297     TRACE("Creating DirectX9 pixel shader 1.0 writer\n");
2298     writer->funcs = &ps_1_0123_backend;
2299 }
2300 
2301 static void init_ps11_dx9_writer(struct bc_writer *writer) {
2302     TRACE("Creating DirectX9 pixel shader 1.1 writer\n");
2303     writer->funcs = &ps_1_0123_backend;
2304 }
2305 
2306 static void init_ps12_dx9_writer(struct bc_writer *writer) {
2307     TRACE("Creating DirectX9 pixel shader 1.2 writer\n");
2308     writer->funcs = &ps_1_0123_backend;
2309 }
2310 
2311 static void init_ps13_dx9_writer(struct bc_writer *writer) {
2312     TRACE("Creating DirectX9 pixel shader 1.3 writer\n");
2313     writer->funcs = &ps_1_0123_backend;
2314 }
2315 
2316 static void init_ps14_dx9_writer(struct bc_writer *writer) {
2317     TRACE("Creating DirectX9 pixel shader 1.4 writer\n");
2318     writer->funcs = &ps_1_4_backend;
2319 }
2320 
2321 static void init_ps20_dx9_writer(struct bc_writer *writer) {
2322     TRACE("Creating DirectX9 pixel shader 2.0 writer\n");
2323     writer->funcs = &ps_2_0_backend;
2324 }
2325 
2326 static void init_ps2x_dx9_writer(struct bc_writer *writer) {
2327     TRACE("Creating DirectX9 pixel shader 2.x writer\n");
2328     writer->funcs = &ps_2_x_backend;
2329 }
2330 
2331 static void init_ps30_dx9_writer(struct bc_writer *writer) {
2332     TRACE("Creating DirectX9 pixel shader 3.0 writer\n");
2333     writer->funcs = &ps_3_backend;
2334 }
2335 
2336 static struct bc_writer *create_writer(DWORD version, DWORD dxversion) {
2337     struct bc_writer *ret = d3dcompiler_alloc(sizeof(*ret));
2338 
2339     if(!ret) {
2340         WARN("Failed to allocate a bytecode writer instance\n");
2341         return NULL;
2342     }
2343 
2344     switch(version) {
2345         case BWRITERVS_VERSION(1, 0):
2346             if(dxversion != 9) {
2347                 WARN("Unsupported dxversion for vertex shader 1.0 requested: %u\n", dxversion);
2348                 goto fail;
2349             }
2350             init_vs10_dx9_writer(ret);
2351             break;
2352         case BWRITERVS_VERSION(1, 1):
2353             if(dxversion != 9) {
2354                 WARN("Unsupported dxversion for vertex shader 1.1 requested: %u\n", dxversion);
2355                 goto fail;
2356             }
2357             init_vs11_dx9_writer(ret);
2358             break;
2359         case BWRITERVS_VERSION(2, 0):
2360             if(dxversion != 9) {
2361                 WARN("Unsupported dxversion for vertex shader 2.0 requested: %u\n", dxversion);
2362                 goto fail;
2363             }
2364             init_vs20_dx9_writer(ret);
2365             break;
2366         case BWRITERVS_VERSION(2, 1):
2367             if(dxversion != 9) {
2368                 WARN("Unsupported dxversion for vertex shader 2.x requested: %u\n", dxversion);
2369                 goto fail;
2370             }
2371             init_vs2x_dx9_writer(ret);
2372             break;
2373         case BWRITERVS_VERSION(3, 0):
2374             if(dxversion != 9) {
2375                 WARN("Unsupported dxversion for vertex shader 3.0 requested: %u\n", dxversion);
2376                 goto fail;
2377             }
2378             init_vs30_dx9_writer(ret);
2379             break;
2380 
2381         case BWRITERPS_VERSION(1, 0):
2382             if(dxversion != 9) {
2383                 WARN("Unsupported dxversion for pixel shader 1.0 requested: %u\n", dxversion);
2384                 goto fail;
2385             }
2386             init_ps10_dx9_writer(ret);
2387             break;
2388         case BWRITERPS_VERSION(1, 1):
2389             if(dxversion != 9) {
2390                 WARN("Unsupported dxversion for pixel shader 1.1 requested: %u\n", dxversion);
2391                 goto fail;
2392             }
2393             init_ps11_dx9_writer(ret);
2394             break;
2395         case BWRITERPS_VERSION(1, 2):
2396             if(dxversion != 9) {
2397                 WARN("Unsupported dxversion for pixel shader 1.2 requested: %u\n", dxversion);
2398                 goto fail;
2399             }
2400             init_ps12_dx9_writer(ret);
2401             break;
2402         case BWRITERPS_VERSION(1, 3):
2403             if(dxversion != 9) {
2404                 WARN("Unsupported dxversion for pixel shader 1.3 requested: %u\n", dxversion);
2405                 goto fail;
2406             }
2407             init_ps13_dx9_writer(ret);
2408             break;
2409         case BWRITERPS_VERSION(1, 4):
2410             if(dxversion != 9) {
2411                 WARN("Unsupported dxversion for pixel shader 1.4 requested: %u\n", dxversion);
2412                 goto fail;
2413             }
2414             init_ps14_dx9_writer(ret);
2415             break;
2416 
2417         case BWRITERPS_VERSION(2, 0):
2418             if(dxversion != 9) {
2419                 WARN("Unsupported dxversion for pixel shader 2.0 requested: %u\n", dxversion);
2420                 goto fail;
2421             }
2422             init_ps20_dx9_writer(ret);
2423             break;
2424 
2425         case BWRITERPS_VERSION(2, 1):
2426             if(dxversion != 9) {
2427                 WARN("Unsupported dxversion for pixel shader 2.x requested: %u\n", dxversion);
2428                 goto fail;
2429             }
2430             init_ps2x_dx9_writer(ret);
2431             break;
2432 
2433         case BWRITERPS_VERSION(3, 0):
2434             if(dxversion != 9) {
2435                 WARN("Unsupported dxversion for pixel shader 3.0 requested: %u\n", dxversion);
2436                 goto fail;
2437             }
2438             init_ps30_dx9_writer(ret);
2439             break;
2440 
2441         default:
2442             WARN("Unexpected shader version requested: %08x\n", version);
2443             goto fail;
2444     }
2445     ret->version = version;
2446     return ret;
2447 
2448 fail:
2449     d3dcompiler_free(ret);
2450     return NULL;
2451 }
2452 
2453 static HRESULT call_instr_handler(struct bc_writer *writer,
2454                                   const struct instruction *instr,
2455                                   struct bytecode_buffer *buffer) {
2456     DWORD i=0;
2457 
2458     while(writer->funcs->instructions[i].opcode != BWRITERSIO_END) {
2459         if(instr->opcode == writer->funcs->instructions[i].opcode) {
2460             if(!writer->funcs->instructions[i].func) {
2461                 WARN("Opcode %u not supported by this profile\n", instr->opcode);
2462                 return E_INVALIDARG;
2463             }
2464             writer->funcs->instructions[i].func(writer, instr, buffer);
2465             return S_OK;
2466         }
2467         i++;
2468     }
2469 
2470     FIXME("Unhandled instruction %u - %s\n", instr->opcode,
2471           debug_print_opcode(instr->opcode));
2472     return E_INVALIDARG;
2473 }
2474 
2475 HRESULT SlWriteBytecode(const struct bwriter_shader *shader, int dxversion, DWORD **result, DWORD *size)
2476 {
2477     struct bc_writer *writer;
2478     struct bytecode_buffer *buffer = NULL;
2479     HRESULT hr;
2480     unsigned int i;
2481 
2482     if(!shader){
2483         ERR("NULL shader structure, aborting\n");
2484         return E_FAIL;
2485     }
2486     writer = create_writer(shader->version, dxversion);
2487     *result = NULL;
2488 
2489     if(!writer) {
2490         WARN("Could not create a bytecode writer instance. Either unsupported version\n");
2491         WARN("or out of memory\n");
2492         hr = E_FAIL;
2493         goto error;
2494     }
2495 
2496     buffer = allocate_buffer();
2497     if(!buffer) {
2498         WARN("Failed to allocate a buffer for the shader bytecode\n");
2499         hr = E_FAIL;
2500         goto error;
2501     }
2502 
2503     /* Write shader type and version */
2504     put_dword(buffer, shader->version);
2505 
2506     writer->funcs->header(writer, shader, buffer);
2507     if(FAILED(writer->state)) {
2508         hr = writer->state;
2509         goto error;
2510     }
2511 
2512     for(i = 0; i < shader->num_instrs; i++) {
2513         hr = call_instr_handler(writer, shader->instr[i], buffer);
2514         if(FAILED(hr)) {
2515             goto error;
2516         }
2517     }
2518 
2519     if(FAILED(writer->state)) {
2520         hr = writer->state;
2521         goto error;
2522     }
2523 
2524     writer->funcs->end(writer, shader, buffer);
2525 
2526     if(FAILED(buffer->state)) {
2527         hr = buffer->state;
2528         goto error;
2529     }
2530 
2531     *size = buffer->size * sizeof(DWORD);
2532     *result = buffer->data;
2533     buffer->data = NULL;
2534     hr = S_OK;
2535 
2536 error:
2537     if(buffer) {
2538         d3dcompiler_free(buffer->data);
2539         d3dcompiler_free(buffer);
2540     }
2541     d3dcompiler_free(writer);
2542     return hr;
2543 }
2544 
2545 void SlDeleteShader(struct bwriter_shader *shader) {
2546     unsigned int i, j;
2547 
2548     TRACE("Deleting shader %p\n", shader);
2549 
2550     for(i = 0; i < shader->num_cf; i++) {
2551         d3dcompiler_free(shader->constF[i]);
2552     }
2553     d3dcompiler_free(shader->constF);
2554     for(i = 0; i < shader->num_ci; i++) {
2555         d3dcompiler_free(shader->constI[i]);
2556     }
2557     d3dcompiler_free(shader->constI);
2558     for(i = 0; i < shader->num_cb; i++) {
2559         d3dcompiler_free(shader->constB[i]);
2560     }
2561     d3dcompiler_free(shader->constB);
2562 
2563     d3dcompiler_free(shader->inputs);
2564     d3dcompiler_free(shader->outputs);
2565     d3dcompiler_free(shader->samplers);
2566 
2567     for(i = 0; i < shader->num_instrs; i++) {
2568         for(j = 0; j < shader->instr[i]->num_srcs; j++) {
2569             d3dcompiler_free(shader->instr[i]->src[j].rel_reg);
2570         }
2571         d3dcompiler_free(shader->instr[i]->src);
2572         d3dcompiler_free(shader->instr[i]->dst.rel_reg);
2573         d3dcompiler_free(shader->instr[i]);
2574     }
2575     d3dcompiler_free(shader->instr);
2576 
2577     d3dcompiler_free(shader);
2578 }
2579