1 /*
2  * Direct3D bytecode output functions
3  *
4  * Copyright 2008 Stefan Dösinger
5  * Copyright 2009 Matteo Bruni
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20  *
21  */
22 
23 #include "d3dcompiler_private.h"
24 #include "d3d9types.h"
25 
26 WINE_DEFAULT_DEBUG_CHANNEL(bytecodewriter);
27 
28 /****************************************************************
29  * General assembler shader construction helper routines follow *
30  ****************************************************************/
31 /* struct instruction *alloc_instr
32  *
33  * Allocates a new instruction structure with srcs registers
34  *
35  * Parameters:
36  *  srcs: Number of source registers to allocate
37  *
38  * Returns:
39  *  A pointer to the allocated instruction structure
40  *  NULL in case of an allocation failure
41  */
42 struct instruction *alloc_instr(unsigned int srcs) {
43     struct instruction *ret = d3dcompiler_alloc(sizeof(*ret));
44     if(!ret) {
45         ERR("Failed to allocate memory for an instruction structure\n");
46         return NULL;
47     }
48 
49     if(srcs) {
50         ret->src = d3dcompiler_alloc(srcs * sizeof(*ret->src));
51         if(!ret->src) {
52             ERR("Failed to allocate memory for instruction registers\n");
53             d3dcompiler_free(ret);
54             return NULL;
55         }
56         ret->num_srcs = srcs;
57     }
58     return ret;
59 }
60 
61 /* void add_instruction
62  *
63  * Adds a new instruction to the shader's instructions array and grows the instruction array
64  * if needed.
65  *
66  * The function does NOT copy the instruction structure. Make sure not to release the
67  * instruction or any of its substructures like registers.
68  *
69  * Parameters:
70  *  shader: Shader to add the instruction to
71  *  instr: Instruction to add to the shader
72  */
73 BOOL add_instruction(struct bwriter_shader *shader, struct instruction *instr) {
74     struct instruction      **new_instructions;
75 
76     if(!shader) return FALSE;
77 
78     if(shader->instr_alloc_size == 0) {
79         shader->instr = d3dcompiler_alloc(sizeof(*shader->instr) * INSTRARRAY_INITIAL_SIZE);
80         if(!shader->instr) {
81             ERR("Failed to allocate the shader instruction array\n");
82             return FALSE;
83         }
84         shader->instr_alloc_size = INSTRARRAY_INITIAL_SIZE;
85     } else if(shader->instr_alloc_size == shader->num_instrs) {
86         new_instructions = d3dcompiler_realloc(shader->instr,
87                                        sizeof(*shader->instr) * (shader->instr_alloc_size) * 2);
88         if(!new_instructions) {
89             ERR("Failed to grow the shader instruction array\n");
90             return FALSE;
91         }
92         shader->instr = new_instructions;
93         shader->instr_alloc_size = shader->instr_alloc_size * 2;
94     } else if(shader->num_instrs > shader->instr_alloc_size) {
95         ERR("More instructions than allocated. This should not happen\n");
96         return FALSE;
97     }
98 
99     shader->instr[shader->num_instrs] = instr;
100     shader->num_instrs++;
101     return TRUE;
102 }
103 
104 BOOL add_constF(struct bwriter_shader *shader, DWORD reg, float x, float y, float z, float w) {
105     struct constant *newconst;
106 
107     if(shader->num_cf) {
108         struct constant **newarray;
109         newarray = d3dcompiler_realloc(shader->constF,
110                                sizeof(*shader->constF) * (shader->num_cf + 1));
111         if(!newarray) {
112             ERR("Failed to grow the constants array\n");
113             return FALSE;
114         }
115         shader->constF = newarray;
116     } else {
117         shader->constF = d3dcompiler_alloc(sizeof(*shader->constF));
118         if(!shader->constF) {
119             ERR("Failed to allocate the constants array\n");
120             return FALSE;
121         }
122     }
123 
124     newconst = d3dcompiler_alloc(sizeof(*newconst));
125     if(!newconst) {
126         ERR("Failed to allocate a new constant\n");
127         return FALSE;
128     }
129     newconst->regnum = reg;
130     newconst->value[0].f = x;
131     newconst->value[1].f = y;
132     newconst->value[2].f = z;
133     newconst->value[3].f = w;
134     shader->constF[shader->num_cf] = newconst;
135 
136     shader->num_cf++;
137     return TRUE;
138 }
139 
140 BOOL add_constI(struct bwriter_shader *shader, DWORD reg, INT x, INT y, INT z, INT w) {
141     struct constant *newconst;
142 
143     if(shader->num_ci) {
144         struct constant **newarray;
145         newarray = d3dcompiler_realloc(shader->constI,
146                                sizeof(*shader->constI) * (shader->num_ci + 1));
147         if(!newarray) {
148             ERR("Failed to grow the constants array\n");
149             return FALSE;
150         }
151         shader->constI = newarray;
152     } else {
153         shader->constI = d3dcompiler_alloc(sizeof(*shader->constI));
154         if(!shader->constI) {
155             ERR("Failed to allocate the constants array\n");
156             return FALSE;
157         }
158     }
159 
160     newconst = d3dcompiler_alloc(sizeof(*newconst));
161     if(!newconst) {
162         ERR("Failed to allocate a new constant\n");
163         return FALSE;
164     }
165     newconst->regnum = reg;
166     newconst->value[0].i = x;
167     newconst->value[1].i = y;
168     newconst->value[2].i = z;
169     newconst->value[3].i = w;
170     shader->constI[shader->num_ci] = newconst;
171 
172     shader->num_ci++;
173     return TRUE;
174 }
175 
176 BOOL add_constB(struct bwriter_shader *shader, DWORD reg, BOOL x) {
177     struct constant *newconst;
178 
179     if(shader->num_cb) {
180         struct constant **newarray;
181         newarray = d3dcompiler_realloc(shader->constB,
182                                sizeof(*shader->constB) * (shader->num_cb + 1));
183         if(!newarray) {
184             ERR("Failed to grow the constants array\n");
185             return FALSE;
186         }
187         shader->constB = newarray;
188     } else {
189         shader->constB = d3dcompiler_alloc(sizeof(*shader->constB));
190         if(!shader->constB) {
191             ERR("Failed to allocate the constants array\n");
192             return FALSE;
193         }
194     }
195 
196     newconst = d3dcompiler_alloc(sizeof(*newconst));
197     if(!newconst) {
198         ERR("Failed to allocate a new constant\n");
199         return FALSE;
200     }
201     newconst->regnum = reg;
202     newconst->value[0].b = x;
203     shader->constB[shader->num_cb] = newconst;
204 
205     shader->num_cb++;
206     return TRUE;
207 }
208 
209 BOOL record_declaration(struct bwriter_shader *shader, DWORD usage,
210                         DWORD usage_idx, DWORD mod, BOOL output,
211                         DWORD regnum, DWORD writemask, BOOL builtin) {
212     unsigned int *num;
213     struct declaration **decl;
214     unsigned int i;
215 
216     if(!shader) return FALSE;
217 
218     if(output) {
219         num = &shader->num_outputs;
220         decl = &shader->outputs;
221     } else {
222         num = &shader->num_inputs;
223         decl = &shader->inputs;
224     }
225 
226     if(*num == 0) {
227         *decl = d3dcompiler_alloc(sizeof(**decl));
228         if(!*decl) {
229             ERR("Error allocating declarations array\n");
230             return FALSE;
231         }
232     } else {
233         struct declaration *newdecl;
234         for(i = 0; i < *num; i++) {
235             if((*decl)[i].regnum == regnum && ((*decl)[i].writemask & writemask)) {
236                 WARN("Declaration of register %u already exists, writemask match 0x%x\n",
237                       regnum, (*decl)[i].writemask & writemask);
238             }
239         }
240 
241         newdecl = d3dcompiler_realloc(*decl,
242                               sizeof(**decl) * ((*num) + 1));
243         if(!newdecl) {
244             ERR("Error reallocating declarations array\n");
245             return FALSE;
246         }
247         *decl = newdecl;
248     }
249     (*decl)[*num].usage = usage;
250     (*decl)[*num].usage_idx = usage_idx;
251     (*decl)[*num].regnum = regnum;
252     (*decl)[*num].mod = mod;
253     (*decl)[*num].writemask = writemask;
254     (*decl)[*num].builtin = builtin;
255     (*num)++;
256 
257     return TRUE;
258 }
259 
260 BOOL record_sampler(struct bwriter_shader *shader, DWORD samptype, DWORD mod, DWORD regnum) {
261     unsigned int i;
262 
263     if(!shader) return FALSE;
264 
265     if(shader->num_samplers == 0) {
266         shader->samplers = d3dcompiler_alloc(sizeof(*shader->samplers));
267         if(!shader->samplers) {
268             ERR("Error allocating samplers array\n");
269             return FALSE;
270         }
271     } else {
272         struct samplerdecl *newarray;
273 
274         for(i = 0; i < shader->num_samplers; i++) {
275             if(shader->samplers[i].regnum == regnum) {
276                 WARN("Sampler %u already declared\n", regnum);
277                 /* This is not an error as far as the assembler is concerned.
278                  * Direct3D might refuse to load the compiled shader though
279                  */
280             }
281         }
282 
283         newarray = d3dcompiler_realloc(shader->samplers,
284                                sizeof(*shader->samplers) * (shader->num_samplers + 1));
285         if(!newarray) {
286             ERR("Error reallocating samplers array\n");
287             return FALSE;
288         }
289         shader->samplers = newarray;
290     }
291 
292     shader->samplers[shader->num_samplers].type = samptype;
293     shader->samplers[shader->num_samplers].mod = mod;
294     shader->samplers[shader->num_samplers].regnum = regnum;
295     shader->num_samplers++;
296     return TRUE;
297 }
298 
299 
300 /* shader bytecode buffer manipulation functions.
301  * allocate_buffer creates a new buffer structure, put_dword adds a new
302  * DWORD to the buffer. In the rare case of a memory allocation failure
303  * when trying to grow the buffer a flag is set in the buffer to mark it
304  * invalid. This avoids return value checking and passing in many places
305  */
306 static struct bytecode_buffer *allocate_buffer(void) {
307     struct bytecode_buffer *ret;
308 
309     ret = d3dcompiler_alloc(sizeof(*ret));
310     if(!ret) return NULL;
311 
312     ret->alloc_size = BYTECODEBUFFER_INITIAL_SIZE;
313     ret->data = d3dcompiler_alloc(sizeof(DWORD) * ret->alloc_size);
314     if(!ret->data) {
315         d3dcompiler_free(ret);
316         return NULL;
317     }
318     ret->state = S_OK;
319     return ret;
320 }
321 
322 static void put_dword(struct bytecode_buffer *buffer, DWORD value) {
323     if(FAILED(buffer->state)) return;
324 
325     if(buffer->alloc_size == buffer->size) {
326         DWORD *newarray;
327         buffer->alloc_size *= 2;
328         newarray = d3dcompiler_realloc(buffer->data,
329                                sizeof(DWORD) * buffer->alloc_size);
330         if(!newarray) {
331             ERR("Failed to grow the buffer data memory\n");
332             buffer->state = E_OUTOFMEMORY;
333             return;
334         }
335         buffer->data = newarray;
336     }
337     buffer->data[buffer->size++] = value;
338 }
339 
340 /* bwriter -> d3d9 conversion functions. */
341 static DWORD d3d9_swizzle(DWORD bwriter_swizzle)
342 {
343     /* Currently a NOP, but this allows changing the internal definitions
344      * without side effects. */
345     DWORD ret = 0;
346 
347     if ((bwriter_swizzle & BWRITERVS_X_X) == BWRITERVS_X_X) ret |= D3DVS_X_X;
348     if ((bwriter_swizzle & BWRITERVS_X_Y) == BWRITERVS_X_Y) ret |= D3DVS_X_Y;
349     if ((bwriter_swizzle & BWRITERVS_X_Z) == BWRITERVS_X_Z) ret |= D3DVS_X_Z;
350     if ((bwriter_swizzle & BWRITERVS_X_W) == BWRITERVS_X_W) ret |= D3DVS_X_W;
351 
352     if ((bwriter_swizzle & BWRITERVS_Y_X) == BWRITERVS_Y_X) ret |= D3DVS_Y_X;
353     if ((bwriter_swizzle & BWRITERVS_Y_Y) == BWRITERVS_Y_Y) ret |= D3DVS_Y_Y;
354     if ((bwriter_swizzle & BWRITERVS_Y_Z) == BWRITERVS_Y_Z) ret |= D3DVS_Y_Z;
355     if ((bwriter_swizzle & BWRITERVS_Y_W) == BWRITERVS_Y_W) ret |= D3DVS_Y_W;
356 
357     if ((bwriter_swizzle & BWRITERVS_Z_X) == BWRITERVS_Z_X) ret |= D3DVS_Z_X;
358     if ((bwriter_swizzle & BWRITERVS_Z_Y) == BWRITERVS_Z_Y) ret |= D3DVS_Z_Y;
359     if ((bwriter_swizzle & BWRITERVS_Z_Z) == BWRITERVS_Z_Z) ret |= D3DVS_Z_Z;
360     if ((bwriter_swizzle & BWRITERVS_Z_W) == BWRITERVS_Z_W) ret |= D3DVS_Z_W;
361 
362     if ((bwriter_swizzle & BWRITERVS_W_X) == BWRITERVS_W_X) ret |= D3DVS_W_X;
363     if ((bwriter_swizzle & BWRITERVS_W_Y) == BWRITERVS_W_Y) ret |= D3DVS_W_Y;
364     if ((bwriter_swizzle & BWRITERVS_W_Z) == BWRITERVS_W_Z) ret |= D3DVS_W_Z;
365     if ((bwriter_swizzle & BWRITERVS_W_W) == BWRITERVS_W_W) ret |= D3DVS_W_W;
366 
367     return ret;
368 }
369 
370 static DWORD d3d9_writemask(DWORD bwriter_writemask)
371 {
372     DWORD ret = 0;
373 
374     if (bwriter_writemask & BWRITERSP_WRITEMASK_0) ret |= D3DSP_WRITEMASK_0;
375     if (bwriter_writemask & BWRITERSP_WRITEMASK_1) ret |= D3DSP_WRITEMASK_1;
376     if (bwriter_writemask & BWRITERSP_WRITEMASK_2) ret |= D3DSP_WRITEMASK_2;
377     if (bwriter_writemask & BWRITERSP_WRITEMASK_3) ret |= D3DSP_WRITEMASK_3;
378 
379     return ret;
380 }
381 
382 static DWORD d3d9_srcmod(DWORD bwriter_srcmod)
383 {
384     switch (bwriter_srcmod)
385     {
386         case BWRITERSPSM_NONE:       return D3DSPSM_NONE;
387         case BWRITERSPSM_NEG:        return D3DSPSM_NEG;
388         case BWRITERSPSM_BIAS:       return D3DSPSM_BIAS;
389         case BWRITERSPSM_BIASNEG:    return D3DSPSM_BIASNEG;
390         case BWRITERSPSM_SIGN:       return D3DSPSM_SIGN;
391         case BWRITERSPSM_SIGNNEG:    return D3DSPSM_SIGNNEG;
392         case BWRITERSPSM_COMP:       return D3DSPSM_COMP;
393         case BWRITERSPSM_X2:         return D3DSPSM_X2;
394         case BWRITERSPSM_X2NEG:      return D3DSPSM_X2NEG;
395         case BWRITERSPSM_DZ:         return D3DSPSM_DZ;
396         case BWRITERSPSM_DW:         return D3DSPSM_DW;
397         case BWRITERSPSM_ABS:        return D3DSPSM_ABS;
398         case BWRITERSPSM_ABSNEG:     return D3DSPSM_ABSNEG;
399         case BWRITERSPSM_NOT:        return D3DSPSM_NOT;
400         default:
401             FIXME("Unhandled BWRITERSPSM token %#x.\n", bwriter_srcmod);
402             return 0;
403     }
404 }
405 
406 static DWORD d3d9_dstmod(DWORD bwriter_mod)
407 {
408     DWORD ret = 0;
409 
410     if (bwriter_mod & BWRITERSPDM_SATURATE)         ret |= D3DSPDM_SATURATE;
411     if (bwriter_mod & BWRITERSPDM_PARTIALPRECISION) ret |= D3DSPDM_PARTIALPRECISION;
412     if (bwriter_mod & BWRITERSPDM_MSAMPCENTROID)    ret |= D3DSPDM_MSAMPCENTROID;
413 
414     return ret;
415 }
416 
417 static DWORD d3d9_comparetype(DWORD asmshader_comparetype)
418 {
419     switch (asmshader_comparetype)
420     {
421         case BWRITER_COMPARISON_GT:     return D3DSPC_GT;
422         case BWRITER_COMPARISON_EQ:     return D3DSPC_EQ;
423         case BWRITER_COMPARISON_GE:     return D3DSPC_GE;
424         case BWRITER_COMPARISON_LT:     return D3DSPC_LT;
425         case BWRITER_COMPARISON_NE:     return D3DSPC_NE;
426         case BWRITER_COMPARISON_LE:     return D3DSPC_LE;
427         default:
428             FIXME("Unexpected BWRITER_COMPARISON type %#x.\n", asmshader_comparetype);
429             return 0;
430     }
431 }
432 
433 static DWORD d3d9_sampler(DWORD bwriter_sampler)
434 {
435     if (bwriter_sampler == BWRITERSTT_UNKNOWN)  return D3DSTT_UNKNOWN;
436     if (bwriter_sampler == BWRITERSTT_1D)       return D3DSTT_1D;
437     if (bwriter_sampler == BWRITERSTT_2D)       return D3DSTT_2D;
438     if (bwriter_sampler == BWRITERSTT_CUBE)     return D3DSTT_CUBE;
439     if (bwriter_sampler == BWRITERSTT_VOLUME)   return D3DSTT_VOLUME;
440     FIXME("Unexpected BWRITERSAMPLER_TEXTURE_TYPE type %#x.\n", bwriter_sampler);
441 
442     return 0;
443 }
444 
445 static DWORD d3d9_register(DWORD bwriter_register)
446 {
447     if (bwriter_register == BWRITERSPR_TEMP)        return D3DSPR_TEMP;
448     if (bwriter_register == BWRITERSPR_INPUT)       return D3DSPR_INPUT;
449     if (bwriter_register == BWRITERSPR_CONST)       return D3DSPR_CONST;
450     if (bwriter_register == BWRITERSPR_ADDR)        return D3DSPR_ADDR;
451     if (bwriter_register == BWRITERSPR_TEXTURE)     return D3DSPR_TEXTURE;
452     if (bwriter_register == BWRITERSPR_RASTOUT)     return D3DSPR_RASTOUT;
453     if (bwriter_register == BWRITERSPR_ATTROUT)     return D3DSPR_ATTROUT;
454     if (bwriter_register == BWRITERSPR_TEXCRDOUT)   return D3DSPR_TEXCRDOUT;
455     if (bwriter_register == BWRITERSPR_OUTPUT)      return D3DSPR_OUTPUT;
456     if (bwriter_register == BWRITERSPR_CONSTINT)    return D3DSPR_CONSTINT;
457     if (bwriter_register == BWRITERSPR_COLOROUT)    return D3DSPR_COLOROUT;
458     if (bwriter_register == BWRITERSPR_DEPTHOUT)    return D3DSPR_DEPTHOUT;
459     if (bwriter_register == BWRITERSPR_SAMPLER)     return D3DSPR_SAMPLER;
460     if (bwriter_register == BWRITERSPR_CONSTBOOL)   return D3DSPR_CONSTBOOL;
461     if (bwriter_register == BWRITERSPR_LOOP)        return D3DSPR_LOOP;
462     if (bwriter_register == BWRITERSPR_MISCTYPE)    return D3DSPR_MISCTYPE;
463     if (bwriter_register == BWRITERSPR_LABEL)       return D3DSPR_LABEL;
464     if (bwriter_register == BWRITERSPR_PREDICATE)   return D3DSPR_PREDICATE;
465 
466     FIXME("Unexpected BWRITERSPR %#x.\n", bwriter_register);
467     return ~0U;
468 }
469 
470 static DWORD d3d9_opcode(DWORD bwriter_opcode)
471 {
472     switch (bwriter_opcode)
473     {
474         case BWRITERSIO_NOP:         return D3DSIO_NOP;
475         case BWRITERSIO_MOV:         return D3DSIO_MOV;
476         case BWRITERSIO_ADD:         return D3DSIO_ADD;
477         case BWRITERSIO_SUB:         return D3DSIO_SUB;
478         case BWRITERSIO_MAD:         return D3DSIO_MAD;
479         case BWRITERSIO_MUL:         return D3DSIO_MUL;
480         case BWRITERSIO_RCP:         return D3DSIO_RCP;
481         case BWRITERSIO_RSQ:         return D3DSIO_RSQ;
482         case BWRITERSIO_DP3:         return D3DSIO_DP3;
483         case BWRITERSIO_DP4:         return D3DSIO_DP4;
484         case BWRITERSIO_MIN:         return D3DSIO_MIN;
485         case BWRITERSIO_MAX:         return D3DSIO_MAX;
486         case BWRITERSIO_SLT:         return D3DSIO_SLT;
487         case BWRITERSIO_SGE:         return D3DSIO_SGE;
488         case BWRITERSIO_EXP:         return D3DSIO_EXP;
489         case BWRITERSIO_LOG:         return D3DSIO_LOG;
490         case BWRITERSIO_LIT:         return D3DSIO_LIT;
491         case BWRITERSIO_DST:         return D3DSIO_DST;
492         case BWRITERSIO_LRP:         return D3DSIO_LRP;
493         case BWRITERSIO_FRC:         return D3DSIO_FRC;
494         case BWRITERSIO_M4x4:        return D3DSIO_M4x4;
495         case BWRITERSIO_M4x3:        return D3DSIO_M4x3;
496         case BWRITERSIO_M3x4:        return D3DSIO_M3x4;
497         case BWRITERSIO_M3x3:        return D3DSIO_M3x3;
498         case BWRITERSIO_M3x2:        return D3DSIO_M3x2;
499         case BWRITERSIO_CALL:        return D3DSIO_CALL;
500         case BWRITERSIO_CALLNZ:      return D3DSIO_CALLNZ;
501         case BWRITERSIO_LOOP:        return D3DSIO_LOOP;
502         case BWRITERSIO_RET:         return D3DSIO_RET;
503         case BWRITERSIO_ENDLOOP:     return D3DSIO_ENDLOOP;
504         case BWRITERSIO_LABEL:       return D3DSIO_LABEL;
505         case BWRITERSIO_DCL:         return D3DSIO_DCL;
506         case BWRITERSIO_POW:         return D3DSIO_POW;
507         case BWRITERSIO_CRS:         return D3DSIO_CRS;
508         case BWRITERSIO_SGN:         return D3DSIO_SGN;
509         case BWRITERSIO_ABS:         return D3DSIO_ABS;
510         case BWRITERSIO_NRM:         return D3DSIO_NRM;
511         case BWRITERSIO_SINCOS:      return D3DSIO_SINCOS;
512         case BWRITERSIO_REP:         return D3DSIO_REP;
513         case BWRITERSIO_ENDREP:      return D3DSIO_ENDREP;
514         case BWRITERSIO_IF:          return D3DSIO_IF;
515         case BWRITERSIO_IFC:         return D3DSIO_IFC;
516         case BWRITERSIO_ELSE:        return D3DSIO_ELSE;
517         case BWRITERSIO_ENDIF:       return D3DSIO_ENDIF;
518         case BWRITERSIO_BREAK:       return D3DSIO_BREAK;
519         case BWRITERSIO_BREAKC:      return D3DSIO_BREAKC;
520         case BWRITERSIO_MOVA:        return D3DSIO_MOVA;
521         case BWRITERSIO_DEFB:        return D3DSIO_DEFB;
522         case BWRITERSIO_DEFI:        return D3DSIO_DEFI;
523 
524         case BWRITERSIO_TEXCOORD:    return D3DSIO_TEXCOORD;
525         case BWRITERSIO_TEXKILL:     return D3DSIO_TEXKILL;
526         case BWRITERSIO_TEX:         return D3DSIO_TEX;
527         case BWRITERSIO_TEXBEM:      return D3DSIO_TEXBEM;
528         case BWRITERSIO_TEXBEML:     return D3DSIO_TEXBEML;
529         case BWRITERSIO_TEXREG2AR:   return D3DSIO_TEXREG2AR;
530         case BWRITERSIO_TEXREG2GB:   return D3DSIO_TEXREG2GB;
531         case BWRITERSIO_TEXM3x2PAD:  return D3DSIO_TEXM3x2PAD;
532         case BWRITERSIO_TEXM3x2TEX:  return D3DSIO_TEXM3x2TEX;
533         case BWRITERSIO_TEXM3x3PAD:  return D3DSIO_TEXM3x3PAD;
534         case BWRITERSIO_TEXM3x3TEX:  return D3DSIO_TEXM3x3TEX;
535         case BWRITERSIO_TEXM3x3SPEC: return D3DSIO_TEXM3x3SPEC;
536         case BWRITERSIO_TEXM3x3VSPEC:return D3DSIO_TEXM3x3VSPEC;
537         case BWRITERSIO_EXPP:        return D3DSIO_EXPP;
538         case BWRITERSIO_LOGP:        return D3DSIO_LOGP;
539         case BWRITERSIO_CND:         return D3DSIO_CND;
540         case BWRITERSIO_DEF:         return D3DSIO_DEF;
541         case BWRITERSIO_TEXREG2RGB:  return D3DSIO_TEXREG2RGB;
542         case BWRITERSIO_TEXDP3TEX:   return D3DSIO_TEXDP3TEX;
543         case BWRITERSIO_TEXM3x2DEPTH:return D3DSIO_TEXM3x2DEPTH;
544         case BWRITERSIO_TEXDP3:      return D3DSIO_TEXDP3;
545         case BWRITERSIO_TEXM3x3:     return D3DSIO_TEXM3x3;
546         case BWRITERSIO_TEXDEPTH:    return D3DSIO_TEXDEPTH;
547         case BWRITERSIO_CMP:         return D3DSIO_CMP;
548         case BWRITERSIO_BEM:         return D3DSIO_BEM;
549         case BWRITERSIO_DP2ADD:      return D3DSIO_DP2ADD;
550         case BWRITERSIO_DSX:         return D3DSIO_DSX;
551         case BWRITERSIO_DSY:         return D3DSIO_DSY;
552         case BWRITERSIO_TEXLDD:      return D3DSIO_TEXLDD;
553         case BWRITERSIO_SETP:        return D3DSIO_SETP;
554         case BWRITERSIO_TEXLDL:      return D3DSIO_TEXLDL;
555         case BWRITERSIO_BREAKP:      return D3DSIO_BREAKP;
556 
557         case BWRITERSIO_PHASE:       return D3DSIO_PHASE;
558         case BWRITERSIO_COMMENT:     return D3DSIO_COMMENT;
559         case BWRITERSIO_END:         return D3DSIO_END;
560 
561         case BWRITERSIO_TEXLDP:      return D3DSIO_TEX | D3DSI_TEXLD_PROJECT;
562         case BWRITERSIO_TEXLDB:      return D3DSIO_TEX | D3DSI_TEXLD_BIAS;
563 
564         default:
565             FIXME("Unhandled BWRITERSIO token %#x.\n", bwriter_opcode);
566             return ~0U;
567     }
568 }
569 
570 static DWORD d3dsp_register( D3DSHADER_PARAM_REGISTER_TYPE type, DWORD num )
571 {
572     return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
573            ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
574            (num & D3DSP_REGNUM_MASK); /* No shift */
575 }
576 
577 /******************************************************
578  * Implementation of the writer functions starts here *
579  ******************************************************/
580 static void write_declarations(struct bc_writer *This,
581                                struct bytecode_buffer *buffer, BOOL len,
582                                const struct declaration *decls, unsigned int num, DWORD type) {
583     DWORD i;
584     DWORD instr_dcl = D3DSIO_DCL;
585     DWORD token;
586     struct shader_reg reg;
587 
588     ZeroMemory(&reg, sizeof(reg));
589 
590     if(len) {
591         instr_dcl |= 2 << D3DSI_INSTLENGTH_SHIFT;
592     }
593 
594     for(i = 0; i < num; i++) {
595         if(decls[i].builtin) continue;
596 
597         /* Write the DCL instruction */
598         put_dword(buffer, instr_dcl);
599 
600         /* Write the usage and index */
601         token = (1u << 31); /* Bit 31 of non-instruction opcodes is 1 */
602         token |= (decls[i].usage << D3DSP_DCL_USAGE_SHIFT) & D3DSP_DCL_USAGE_MASK;
603         token |= (decls[i].usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT) & D3DSP_DCL_USAGEINDEX_MASK;
604         put_dword(buffer, token);
605 
606         /* Write the dest register */
607         reg.type = type;
608         reg.regnum = decls[i].regnum;
609         reg.u.writemask = decls[i].writemask;
610         This->funcs->dstreg(This, &reg, buffer, 0, decls[i].mod);
611     }
612 }
613 
614 static void write_const(struct constant **consts, int num, DWORD opcode, DWORD reg_type, struct bytecode_buffer *buffer, BOOL len) {
615     int i;
616     DWORD instr_def = opcode;
617     const DWORD reg = (1u << 31) | d3dsp_register( reg_type, 0 ) | D3DSP_WRITEMASK_ALL;
618 
619     if(len) {
620         if(opcode == D3DSIO_DEFB)
621             instr_def |= 2 << D3DSI_INSTLENGTH_SHIFT;
622         else
623             instr_def |= 5 << D3DSI_INSTLENGTH_SHIFT;
624     }
625 
626     for(i = 0; i < num; i++) {
627         /* Write the DEF instruction */
628         put_dword(buffer, instr_def);
629 
630         put_dword(buffer, reg | (consts[i]->regnum & D3DSP_REGNUM_MASK));
631         put_dword(buffer, consts[i]->value[0].d);
632         if(opcode != D3DSIO_DEFB) {
633             put_dword(buffer, consts[i]->value[1].d);
634             put_dword(buffer, consts[i]->value[2].d);
635             put_dword(buffer, consts[i]->value[3].d);
636         }
637     }
638 }
639 
640 static void write_constF(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
641     write_const(shader->constF, shader->num_cf, D3DSIO_DEF, D3DSPR_CONST, buffer, len);
642 }
643 
644 /* This function looks for VS 1/2 registers mapping to VS 3 output registers */
645 static HRESULT vs_find_builtin_varyings(struct bc_writer *This, const struct bwriter_shader *shader) {
646     DWORD i;
647     DWORD usage, usage_idx, writemask, regnum;
648 
649     for(i = 0; i < shader->num_outputs; i++) {
650         if(!shader->outputs[i].builtin) continue;
651 
652         usage = shader->outputs[i].usage;
653         usage_idx = shader->outputs[i].usage_idx;
654         writemask = shader->outputs[i].writemask;
655         regnum = shader->outputs[i].regnum;
656 
657         switch(usage) {
658             case BWRITERDECLUSAGE_POSITION:
659             case BWRITERDECLUSAGE_POSITIONT:
660                 if(usage_idx > 0) {
661                     WARN("dcl_position%u not supported in sm 1/2 shaders\n", usage_idx);
662                     return E_INVALIDARG;
663                 }
664                 TRACE("o%u is oPos\n", regnum);
665                 This->oPos_regnum = regnum;
666                 break;
667 
668             case BWRITERDECLUSAGE_COLOR:
669                 if(usage_idx > 1) {
670                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
671                     return E_INVALIDARG;
672                 }
673                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
674                     WARN("Only WRITEMASK_ALL is supported on color in sm 1/2\n");
675                     return E_INVALIDARG;
676                 }
677                 TRACE("o%u is oD%u\n", regnum, usage_idx);
678                 This->oD_regnum[usage_idx] = regnum;
679                 break;
680 
681             case BWRITERDECLUSAGE_TEXCOORD:
682                 if(usage_idx >= 8) {
683                     WARN("dcl_color%u not supported in sm 1/2 shaders\n", usage_idx);
684                     return E_INVALIDARG;
685                 }
686                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
687                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
688                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
689                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
690                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
691                     return E_INVALIDARG;
692                 }
693                 TRACE("o%u is oT%u\n", regnum, usage_idx);
694                 This->oT_regnum[usage_idx] = regnum;
695                 break;
696 
697             case BWRITERDECLUSAGE_PSIZE:
698                 if(usage_idx > 0) {
699                     WARN("dcl_psize%u not supported in sm 1/2 shaders\n", usage_idx);
700                     return E_INVALIDARG;
701                 }
702                 TRACE("o%u writemask 0x%08x is oPts\n", regnum, writemask);
703                 This->oPts_regnum = regnum;
704                 This->oPts_mask = writemask;
705                 break;
706 
707             case BWRITERDECLUSAGE_FOG:
708                 if(usage_idx > 0) {
709                     WARN("dcl_fog%u not supported in sm 1 shaders\n", usage_idx);
710                     return E_INVALIDARG;
711                 }
712                 if(writemask != BWRITERSP_WRITEMASK_0 && writemask != BWRITERSP_WRITEMASK_1 &&
713                    writemask != BWRITERSP_WRITEMASK_2 && writemask != BWRITERSP_WRITEMASK_3) {
714                     WARN("Unsupported fog writemask\n");
715                     return E_INVALIDARG;
716                 }
717                 TRACE("o%u writemask 0x%08x is oFog\n", regnum, writemask);
718                 This->oFog_regnum = regnum;
719                 This->oFog_mask = writemask;
720                 break;
721 
722             default:
723                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
724                 return E_INVALIDARG;
725         }
726     }
727 
728     return S_OK;
729 }
730 
731 static void vs_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
732     HRESULT hr;
733 
734     if(shader->num_ci || shader->num_cb) {
735         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
736         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
737         This->state = E_INVALIDARG;
738         return;
739     }
740 
741     hr = vs_find_builtin_varyings(This, shader);
742     if(FAILED(hr)) {
743         This->state = hr;
744         return;
745     }
746 
747     write_declarations(This, buffer, FALSE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
748     write_constF(shader, buffer, FALSE);
749 }
750 
751 static HRESULT find_ps_builtin_semantics(struct bc_writer *This,
752                                          const struct bwriter_shader *shader,
753                                          DWORD texcoords) {
754     DWORD i;
755     DWORD usage, usage_idx, writemask, regnum;
756 
757     This->v_regnum[0] = -1; This->v_regnum[1] = -1;
758     for(i = 0; i < 8; i++) This->t_regnum[i] = -1;
759 
760     for(i = 0; i < shader->num_inputs; i++) {
761         if(!shader->inputs[i].builtin) continue;
762 
763         usage = shader->inputs[i].usage;
764         usage_idx = shader->inputs[i].usage_idx;
765         writemask = shader->inputs[i].writemask;
766         regnum = shader->inputs[i].regnum;
767 
768         switch(usage) {
769             case BWRITERDECLUSAGE_COLOR:
770                 if(usage_idx > 1) {
771                     WARN("dcl_color%u not supported in sm 1 shaders\n", usage_idx);
772                     return E_INVALIDARG;
773                 }
774                 if(writemask != BWRITERSP_WRITEMASK_ALL) {
775                     WARN("Only WRITEMASK_ALL is supported on color in sm 1\n");
776                     return E_INVALIDARG;
777                 }
778                 TRACE("v%u is v%u\n", regnum, usage_idx);
779                 This->v_regnum[usage_idx] = regnum;
780                 break;
781 
782             case BWRITERDECLUSAGE_TEXCOORD:
783                 if(usage_idx > texcoords) {
784                     WARN("dcl_texcoord%u not supported in this shader version\n", usage_idx);
785                     return E_INVALIDARG;
786                 }
787                 if(writemask != (BWRITERSP_WRITEMASK_0) &&
788                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1) &&
789                    writemask != (BWRITERSP_WRITEMASK_0 | BWRITERSP_WRITEMASK_1 | BWRITERSP_WRITEMASK_2) &&
790                    writemask != (BWRITERSP_WRITEMASK_ALL)) {
791                     WARN("Partial writemasks not supported on texture coordinates in sm 1 and 2\n");
792                 } else {
793                     writemask = BWRITERSP_WRITEMASK_ALL;
794                 }
795                 TRACE("v%u is t%u\n", regnum, usage_idx);
796                 This->t_regnum[usage_idx] = regnum;
797                 break;
798 
799             default:
800                 WARN("Varying type %u is not supported in shader model 1.x\n", usage);
801                 return E_INVALIDARG;
802         }
803     }
804 
805     return S_OK;
806 }
807 
808 static void ps_1_x_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
809     HRESULT hr;
810 
811     /* First check the constants and varyings, and complain if unsupported things are used */
812     if(shader->num_ci || shader->num_cb) {
813         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
814         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
815         This->state = E_INVALIDARG;
816         return;
817     }
818 
819     hr = find_ps_builtin_semantics(This, shader, 4);
820     if(FAILED(hr)) {
821         This->state = hr;
822         return;
823     }
824 
825     write_constF(shader, buffer, FALSE);
826 }
827 
828 static void ps_1_4_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
829     HRESULT hr;
830 
831     /* First check the constants and varyings, and complain if unsupported things are used */
832     if(shader->num_ci || shader->num_cb) {
833         WARN("Int and bool constants are not supported in shader model 1 shaders\n");
834         WARN("Got %u int and %u boolean constants\n", shader->num_ci, shader->num_cb);
835         This->state = E_INVALIDARG;
836         return;
837     }
838     hr = find_ps_builtin_semantics(This, shader, 6);
839     if(FAILED(hr)) {
840         This->state = hr;
841         return;
842     }
843 
844     write_constF(shader, buffer, FALSE);
845 }
846 
847 static void end(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
848     put_dword(buffer, D3DSIO_END);
849 }
850 
851 static DWORD map_vs_output(struct bc_writer *This, DWORD regnum, DWORD mask, DWORD *has_components) {
852     DWORD i;
853 
854     *has_components = TRUE;
855     if(regnum == This->oPos_regnum) {
856         return d3dsp_register( D3DSPR_RASTOUT, D3DSRO_POSITION );
857     }
858     if(regnum == This->oFog_regnum && mask == This->oFog_mask) {
859         *has_components = FALSE;
860         return d3dsp_register( D3DSPR_RASTOUT, D3DSRO_FOG ) | D3DSP_WRITEMASK_ALL;
861     }
862     if(regnum == This->oPts_regnum && mask == This->oPts_mask) {
863         *has_components = FALSE;
864         return d3dsp_register( D3DSPR_RASTOUT, D3DSRO_POINT_SIZE ) | D3DSP_WRITEMASK_ALL;
865     }
866     for(i = 0; i < 2; i++) {
867         if(regnum == This->oD_regnum[i]) {
868             return d3dsp_register( D3DSPR_ATTROUT, i );
869         }
870     }
871     for(i = 0; i < 8; i++) {
872         if(regnum == This->oT_regnum[i]) {
873             return d3dsp_register( D3DSPR_TEXCRDOUT, i );
874         }
875     }
876 
877     /* The varying must be undeclared - if an unsupported varying was declared,
878      * the vs_find_builtin_varyings function would have caught it and this code
879      * would not run */
880     WARN("Undeclared varying %u\n", regnum);
881     This->state = E_INVALIDARG;
882     return -1;
883 }
884 
885 static void vs_12_dstreg(struct bc_writer *This, const struct shader_reg *reg,
886                          struct bytecode_buffer *buffer,
887                          DWORD shift, DWORD mod) {
888     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
889     DWORD has_wmask;
890 
891     if(reg->rel_reg) {
892         WARN("Relative addressing not supported for destination registers\n");
893         This->state = E_INVALIDARG;
894         return;
895     }
896 
897     switch(reg->type) {
898         case BWRITERSPR_OUTPUT:
899             token |= map_vs_output(This, reg->regnum, reg->u.writemask, &has_wmask);
900             break;
901 
902         case BWRITERSPR_RASTOUT:
903         case BWRITERSPR_ATTROUT:
904             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
905             * but are unexpected. If we hit this path it might be due to an error.
906             */
907             FIXME("Unexpected register type %u\n", reg->type);
908             /* drop through */
909         case BWRITERSPR_INPUT:
910         case BWRITERSPR_TEMP:
911         case BWRITERSPR_CONST:
912             token |= d3dsp_register( reg->type, reg->regnum );
913             has_wmask = TRUE;
914             break;
915 
916         case BWRITERSPR_ADDR:
917             if(reg->regnum != 0) {
918                 WARN("Only a0 exists\n");
919                 This->state = E_INVALIDARG;
920                 return;
921             }
922             token |= d3dsp_register( D3DSPR_ADDR, 0 );
923             has_wmask = TRUE;
924             break;
925 
926         case BWRITERSPR_PREDICATE:
927             if(This->version != BWRITERVS_VERSION(2, 1)){
928                 WARN("Predicate register is allowed only in vs_2_x\n");
929                 This->state = E_INVALIDARG;
930                 return;
931             }
932             if(reg->regnum != 0) {
933                 WARN("Only predicate register p0 exists\n");
934                 This->state = E_INVALIDARG;
935                 return;
936             }
937             token |= d3dsp_register( D3DSPR_PREDICATE, 0 );
938             has_wmask = TRUE;
939             break;
940 
941         default:
942             WARN("Invalid register type for 1.x-2.x vertex shader\n");
943             This->state = E_INVALIDARG;
944             return;
945     }
946 
947     /* strictly speaking there are no modifiers in vs_2_0 and vs_1_x, but they can be written
948      * into the bytecode and since the compiler doesn't do such checks write them
949      * (the checks are done by the undocumented shader validator)
950      */
951     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
952     token |= d3d9_dstmod(mod);
953 
954     if(has_wmask) {
955         token |= d3d9_writemask(reg->u.writemask);
956     }
957     put_dword(buffer, token);
958 }
959 
960 static void vs_1_x_srcreg(struct bc_writer *This, const struct shader_reg *reg,
961                           struct bytecode_buffer *buffer) {
962     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
963     DWORD has_swizzle;
964     DWORD component;
965 
966     switch(reg->type) {
967         case BWRITERSPR_OUTPUT:
968             /* Map the swizzle to a writemask, the format expected
969                by map_vs_output
970              */
971             switch(reg->u.swizzle) {
972                 case BWRITERVS_SWIZZLE_X:
973                     component = BWRITERSP_WRITEMASK_0;
974                     break;
975                 case BWRITERVS_SWIZZLE_Y:
976                     component = BWRITERSP_WRITEMASK_1;
977                     break;
978                 case BWRITERVS_SWIZZLE_Z:
979                     component = BWRITERSP_WRITEMASK_2;
980                     break;
981                 case BWRITERVS_SWIZZLE_W:
982                     component = BWRITERSP_WRITEMASK_3;
983                     break;
984                 default:
985                     component = 0;
986             }
987             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
988             break;
989 
990         case BWRITERSPR_RASTOUT:
991         case BWRITERSPR_ATTROUT:
992             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
993              * but are unexpected. If we hit this path it might be due to an error.
994              */
995             FIXME("Unexpected register type %u\n", reg->type);
996             /* drop through */
997         case BWRITERSPR_INPUT:
998         case BWRITERSPR_TEMP:
999         case BWRITERSPR_CONST:
1000         case BWRITERSPR_ADDR:
1001             token |= d3dsp_register( reg->type, reg->regnum );
1002             if(reg->rel_reg) {
1003                 if(reg->rel_reg->type != BWRITERSPR_ADDR ||
1004                    reg->rel_reg->regnum != 0 ||
1005                    reg->rel_reg->u.swizzle != BWRITERVS_SWIZZLE_X) {
1006                     WARN("Relative addressing in vs_1_x is only allowed with a0.x\n");
1007                     This->state = E_INVALIDARG;
1008                     return;
1009                 }
1010                 token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1011             }
1012             break;
1013 
1014         default:
1015             WARN("Invalid register type for 1.x vshader\n");
1016             This->state = E_INVALIDARG;
1017             return;
1018     }
1019 
1020     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1021 
1022     token |= d3d9_srcmod(reg->srcmod);
1023     put_dword(buffer, token);
1024 }
1025 
1026 static void write_srcregs(struct bc_writer *This, const struct instruction *instr,
1027                           struct bytecode_buffer *buffer){
1028     unsigned int i;
1029     if(instr->has_predicate){
1030         This->funcs->srcreg(This, &instr->predicate, buffer);
1031     }
1032     for(i = 0; i < instr->num_srcs; i++){
1033         This->funcs->srcreg(This, &instr->src[i], buffer);
1034     }
1035 }
1036 
1037 static DWORD map_ps13_temp(struct bc_writer *This, const struct shader_reg *reg) {
1038     if(reg->regnum == T0_REG) {
1039         return d3dsp_register( D3DSPR_TEXTURE, 0 );
1040     } else if(reg->regnum == T1_REG) {
1041         return d3dsp_register( D3DSPR_TEXTURE, 1 );
1042     } else if(reg->regnum == T2_REG) {
1043         return d3dsp_register( D3DSPR_TEXTURE, 2 );
1044     } else if(reg->regnum == T3_REG) {
1045         return d3dsp_register( D3DSPR_TEXTURE, 3 );
1046     } else {
1047         return d3dsp_register( D3DSPR_TEMP, reg->regnum );
1048     }
1049 }
1050 
1051 static DWORD map_ps_input(struct bc_writer *This,
1052                           const struct shader_reg *reg) {
1053     DWORD i;
1054     /* Map color interpolators */
1055     for(i = 0; i < 2; i++) {
1056         if(reg->regnum == This->v_regnum[i]) {
1057             return d3dsp_register( D3DSPR_INPUT, i );
1058         }
1059     }
1060     for(i = 0; i < 8; i++) {
1061         if(reg->regnum == This->t_regnum[i]) {
1062             return d3dsp_register( D3DSPR_TEXTURE, i );
1063         }
1064     }
1065 
1066     WARN("Invalid ps 1/2 varying\n");
1067     This->state = E_INVALIDARG;
1068     return 0;
1069 }
1070 
1071 static void ps_1_0123_srcreg(struct bc_writer *This, const struct shader_reg *reg,
1072                              struct bytecode_buffer *buffer) {
1073     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1074     if(reg->rel_reg) {
1075         WARN("Relative addressing not supported in <= ps_3_0\n");
1076         This->state = E_INVALIDARG;
1077         return;
1078     }
1079 
1080     switch(reg->type) {
1081         case BWRITERSPR_INPUT:
1082             token |= map_ps_input(This, reg);
1083             break;
1084 
1085             /* Take care about the texture temporaries. There's a problem: They aren't
1086              * declared anywhere, so we can only hardcode the values that are used
1087              * to map ps_1_3 shaders to the common shader structure
1088              */
1089         case BWRITERSPR_TEMP:
1090             token |= map_ps13_temp(This, reg);
1091             break;
1092 
1093         case BWRITERSPR_CONST: /* Can be mapped 1:1 */
1094             token |= d3dsp_register( reg->type, reg->regnum );
1095             break;
1096 
1097         default:
1098             WARN("Invalid register type for <= ps_1_3 shader\n");
1099             This->state = E_INVALIDARG;
1100             return;
1101     }
1102 
1103     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1104 
1105     if(reg->srcmod == BWRITERSPSM_DZ || reg->srcmod == BWRITERSPSM_DW ||
1106        reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
1107        reg->srcmod == BWRITERSPSM_NOT) {
1108         WARN("Invalid source modifier %u for <= ps_1_3\n", reg->srcmod);
1109         This->state = E_INVALIDARG;
1110         return;
1111     }
1112     token |= d3d9_srcmod(reg->srcmod);
1113     put_dword(buffer, token);
1114 }
1115 
1116 static void ps_1_0123_dstreg(struct bc_writer *This, const struct shader_reg *reg,
1117                              struct bytecode_buffer *buffer,
1118                              DWORD shift, DWORD mod) {
1119     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1120 
1121     if(reg->rel_reg) {
1122         WARN("Relative addressing not supported for destination registers\n");
1123         This->state = E_INVALIDARG;
1124         return;
1125     }
1126 
1127     switch(reg->type) {
1128         case BWRITERSPR_TEMP:
1129             token |= map_ps13_temp(This, reg);
1130             break;
1131 
1132         /* texkill uses the input register as a destination parameter */
1133         case BWRITERSPR_INPUT:
1134             token |= map_ps_input(This, reg);
1135             break;
1136 
1137         default:
1138             WARN("Invalid dest register type for 1.x pshader\n");
1139             This->state = E_INVALIDARG;
1140             return;
1141     }
1142 
1143     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1144     token |= d3d9_dstmod(mod);
1145 
1146     token |= d3d9_writemask(reg->u.writemask);
1147     put_dword(buffer, token);
1148 }
1149 
1150 /* The length of an instruction consists of the destination register (if any),
1151  * the number of source registers, the number of address registers used for
1152  * indirect addressing, and optionally the predicate register
1153  */
1154 static DWORD instrlen(const struct instruction *instr, unsigned int srcs, unsigned int dsts) {
1155     unsigned int i;
1156     DWORD ret = srcs + dsts + (instr->has_predicate ? 1 : 0);
1157 
1158     if(dsts){
1159         if(instr->dst.rel_reg) ret++;
1160     }
1161     for(i = 0; i < srcs; i++) {
1162         if(instr->src[i].rel_reg) ret++;
1163     }
1164     return ret;
1165 }
1166 
1167 static void sm_1_x_opcode(struct bc_writer *This,
1168                           const struct instruction *instr,
1169                           DWORD token, struct bytecode_buffer *buffer) {
1170     /* In sm_1_x instruction length isn't encoded */
1171     if(instr->coissue){
1172         token |= D3DSI_COISSUE;
1173     }
1174     put_dword(buffer, token);
1175 }
1176 
1177 static void instr_handler(struct bc_writer *This,
1178                           const struct instruction *instr,
1179                           struct bytecode_buffer *buffer) {
1180     DWORD token = d3d9_opcode(instr->opcode);
1181 
1182     This->funcs->opcode(This, instr, token, buffer);
1183     if(instr->has_dst) This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1184     write_srcregs(This, instr, buffer);
1185 }
1186 
1187 static const struct instr_handler_table vs_1_x_handlers[] = {
1188     {BWRITERSIO_ADD,            instr_handler},
1189     {BWRITERSIO_NOP,            instr_handler},
1190     {BWRITERSIO_MOV,            instr_handler},
1191     {BWRITERSIO_SUB,            instr_handler},
1192     {BWRITERSIO_MAD,            instr_handler},
1193     {BWRITERSIO_MUL,            instr_handler},
1194     {BWRITERSIO_RCP,            instr_handler},
1195     {BWRITERSIO_RSQ,            instr_handler},
1196     {BWRITERSIO_DP3,            instr_handler},
1197     {BWRITERSIO_DP4,            instr_handler},
1198     {BWRITERSIO_MIN,            instr_handler},
1199     {BWRITERSIO_MAX,            instr_handler},
1200     {BWRITERSIO_SLT,            instr_handler},
1201     {BWRITERSIO_SGE,            instr_handler},
1202     {BWRITERSIO_EXP,            instr_handler},
1203     {BWRITERSIO_LOG,            instr_handler},
1204     {BWRITERSIO_EXPP,           instr_handler},
1205     {BWRITERSIO_LOGP,           instr_handler},
1206     {BWRITERSIO_DST,            instr_handler},
1207     {BWRITERSIO_FRC,            instr_handler},
1208     {BWRITERSIO_M4x4,           instr_handler},
1209     {BWRITERSIO_M4x3,           instr_handler},
1210     {BWRITERSIO_M3x4,           instr_handler},
1211     {BWRITERSIO_M3x3,           instr_handler},
1212     {BWRITERSIO_M3x2,           instr_handler},
1213     {BWRITERSIO_LIT,            instr_handler},
1214 
1215     {BWRITERSIO_END,            NULL}, /* Sentinel value, it signals
1216                                           the end of the list */
1217 };
1218 
1219 static const struct bytecode_backend vs_1_x_backend = {
1220     vs_1_x_header,
1221     end,
1222     vs_1_x_srcreg,
1223     vs_12_dstreg,
1224     sm_1_x_opcode,
1225     vs_1_x_handlers
1226 };
1227 
1228 static void instr_ps_1_0123_texld(struct bc_writer *This,
1229                                   const struct instruction *instr,
1230                                   struct bytecode_buffer *buffer) {
1231     DWORD idx;
1232     struct shader_reg reg;
1233     DWORD swizzlemask;
1234 
1235     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1236        instr->src[1].regnum > 3) {
1237         WARN("Unsupported sampler type %u regnum %u\n",
1238              instr->src[1].type, instr->src[1].regnum);
1239         This->state = E_INVALIDARG;
1240         return;
1241     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1242         WARN("Can only sample into a temp register\n");
1243         This->state = E_INVALIDARG;
1244         return;
1245     }
1246 
1247     idx = instr->src[1].regnum;
1248     if((idx == 0 && instr->dst.regnum != T0_REG) ||
1249        (idx == 1 && instr->dst.regnum != T1_REG) ||
1250        (idx == 2 && instr->dst.regnum != T2_REG) ||
1251        (idx == 3 && instr->dst.regnum != T3_REG)) {
1252         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_x\n",
1253              idx, instr->dst.regnum);
1254         This->state = E_INVALIDARG;
1255         return;
1256     }
1257     if(instr->src[0].type == BWRITERSPR_INPUT) {
1258         /* A simple non-dependent read tex instruction */
1259         if(instr->src[0].regnum != This->t_regnum[idx]) {
1260             WARN("Cannot sample from s%u with texture address data from interpolator %u\n",
1261                  idx, instr->src[0].regnum);
1262             This->state = E_INVALIDARG;
1263             return;
1264         }
1265         This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1266 
1267         /* map the temp dstreg to the ps_1_3 texture temporary register */
1268         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1269     } else if(instr->src[0].type == BWRITERSPR_TEMP) {
1270 
1271         swizzlemask = (3 << BWRITERVS_SWIZZLE_SHIFT) |
1272             (3 << (BWRITERVS_SWIZZLE_SHIFT + 2)) |
1273             (3 << (BWRITERVS_SWIZZLE_SHIFT + 4));
1274         if((instr->src[0].u.swizzle & swizzlemask) == (BWRITERVS_X_X | BWRITERVS_Y_Y | BWRITERVS_Z_Z)) {
1275             TRACE("writing texreg2rgb\n");
1276             This->funcs->opcode(This, instr, D3DSIO_TEXREG2RGB & D3DSI_OPCODE_MASK, buffer);
1277         } else if(instr->src[0].u.swizzle == (BWRITERVS_X_W | BWRITERVS_Y_X | BWRITERVS_Z_X | BWRITERVS_W_X)) {
1278             TRACE("writing texreg2ar\n");
1279             This->funcs->opcode(This, instr, D3DSIO_TEXREG2AR & D3DSI_OPCODE_MASK, buffer);
1280         } else if(instr->src[0].u.swizzle == (BWRITERVS_X_Y | BWRITERVS_Y_Z | BWRITERVS_Z_Z | BWRITERVS_W_Z)) {
1281             TRACE("writing texreg2gb\n");
1282             This->funcs->opcode(This, instr, D3DSIO_TEXREG2GB & D3DSI_OPCODE_MASK, buffer);
1283         } else {
1284             WARN("Unsupported src addr swizzle in dependent texld: 0x%08x\n", instr->src[0].u.swizzle);
1285             This->state = E_INVALIDARG;
1286             return;
1287         }
1288 
1289         /* Dst and src reg can be mapped normally. Both registers are temporary registers in the
1290          * source shader and have to be mapped to the temporary form of the texture registers. However,
1291          * the src reg doesn't have a swizzle
1292          */
1293         This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1294         reg = instr->src[0];
1295         reg.u.swizzle = BWRITERVS_NOSWIZZLE;
1296         This->funcs->srcreg(This, &reg, buffer);
1297     } else {
1298         WARN("Invalid address data source register\n");
1299         This->state = E_INVALIDARG;
1300         return;
1301     }
1302 }
1303 
1304 static void instr_ps_1_0123_mov(struct bc_writer *This,
1305                                 const struct instruction *instr,
1306                                 struct bytecode_buffer *buffer) {
1307     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1308 
1309     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1310         if((instr->dst.regnum == T0_REG && instr->src[0].regnum == This->t_regnum[0]) ||
1311            (instr->dst.regnum == T1_REG && instr->src[0].regnum == This->t_regnum[1]) ||
1312            (instr->dst.regnum == T2_REG && instr->src[0].regnum == This->t_regnum[2]) ||
1313            (instr->dst.regnum == T3_REG && instr->src[0].regnum == This->t_regnum[3])) {
1314             if(instr->dstmod & BWRITERSPDM_SATURATE) {
1315                 This->funcs->opcode(This, instr, D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK, buffer);
1316                 /* Remove the SATURATE flag, it's implicit to the instruction */
1317                 This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod & (~BWRITERSPDM_SATURATE));
1318                 return;
1319             } else {
1320                 WARN("A varying -> temp copy is only supported with the SATURATE modifier in <=ps_1_3\n");
1321                 This->state = E_INVALIDARG;
1322                 return;
1323             }
1324         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1325                   instr->src[0].regnum == This->v_regnum[1]) {
1326             /* Handled by the normal mov below. Just drop out of the if condition */
1327         } else {
1328             WARN("Unsupported varying -> temp mov in <= ps_1_3\n");
1329             This->state = E_INVALIDARG;
1330             return;
1331         }
1332     }
1333 
1334     This->funcs->opcode(This, instr, token, buffer);
1335     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1336     This->funcs->srcreg(This, &instr->src[0], buffer);
1337 }
1338 
1339 static const struct instr_handler_table ps_1_0123_handlers[] = {
1340     {BWRITERSIO_ADD,            instr_handler},
1341     {BWRITERSIO_NOP,            instr_handler},
1342     {BWRITERSIO_MOV,            instr_ps_1_0123_mov},
1343     {BWRITERSIO_SUB,            instr_handler},
1344     {BWRITERSIO_MAD,            instr_handler},
1345     {BWRITERSIO_MUL,            instr_handler},
1346     {BWRITERSIO_DP3,            instr_handler},
1347     {BWRITERSIO_DP4,            instr_handler},
1348     {BWRITERSIO_LRP,            instr_handler},
1349 
1350     /* pshader instructions */
1351     {BWRITERSIO_CND,            instr_handler},
1352     {BWRITERSIO_CMP,            instr_handler},
1353     {BWRITERSIO_TEXKILL,        instr_handler},
1354     {BWRITERSIO_TEX,            instr_ps_1_0123_texld},
1355     {BWRITERSIO_TEXBEM,         instr_handler},
1356     {BWRITERSIO_TEXBEML,        instr_handler},
1357     {BWRITERSIO_TEXM3x2PAD,     instr_handler},
1358     {BWRITERSIO_TEXM3x3PAD,     instr_handler},
1359     {BWRITERSIO_TEXM3x3SPEC,    instr_handler},
1360     {BWRITERSIO_TEXM3x3VSPEC,   instr_handler},
1361     {BWRITERSIO_TEXM3x3TEX,     instr_handler},
1362     {BWRITERSIO_TEXM3x3,        instr_handler},
1363     {BWRITERSIO_TEXM3x2DEPTH,   instr_handler},
1364     {BWRITERSIO_TEXM3x2TEX,     instr_handler},
1365     {BWRITERSIO_TEXDP3,         instr_handler},
1366     {BWRITERSIO_TEXDP3TEX,      instr_handler},
1367     {BWRITERSIO_END,            NULL},
1368 };
1369 
1370 static const struct bytecode_backend ps_1_0123_backend = {
1371     ps_1_x_header,
1372     end,
1373     ps_1_0123_srcreg,
1374     ps_1_0123_dstreg,
1375     sm_1_x_opcode,
1376     ps_1_0123_handlers
1377 };
1378 
1379 static void ps_1_4_srcreg(struct bc_writer *This, const struct shader_reg *reg,
1380                           struct bytecode_buffer *buffer) {
1381     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1382     if(reg->rel_reg) {
1383         WARN("Relative addressing not supported in <= ps_3_0\n");
1384         This->state = E_INVALIDARG;
1385         return;
1386     }
1387 
1388     switch(reg->type) {
1389         case BWRITERSPR_INPUT:
1390             token |= map_ps_input(This, reg);
1391             break;
1392 
1393         /* Can be mapped 1:1 */
1394         case BWRITERSPR_TEMP:
1395         case BWRITERSPR_CONST:
1396             token |= d3dsp_register( reg->type, reg->regnum );
1397             break;
1398 
1399         default:
1400             WARN("Invalid register type for ps_1_4 shader\n");
1401             This->state = E_INVALIDARG;
1402             return;
1403     }
1404 
1405     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1406 
1407     if(reg->srcmod == BWRITERSPSM_ABS || reg->srcmod == BWRITERSPSM_ABSNEG ||
1408        reg->srcmod == BWRITERSPSM_NOT) {
1409         WARN("Invalid source modifier %u for ps_1_4\n", reg->srcmod);
1410         This->state = E_INVALIDARG;
1411         return;
1412     }
1413     token |= d3d9_srcmod(reg->srcmod);
1414     put_dword(buffer, token);
1415 }
1416 
1417 static void ps_1_4_dstreg(struct bc_writer *This, const struct shader_reg *reg,
1418                           struct bytecode_buffer *buffer,
1419                           DWORD shift, DWORD mod) {
1420     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1421 
1422     if(reg->rel_reg) {
1423         WARN("Relative addressing not supported for destination registers\n");
1424         This->state = E_INVALIDARG;
1425         return;
1426     }
1427 
1428     switch(reg->type) {
1429         case BWRITERSPR_TEMP: /* 1:1 mapping */
1430             token |= d3dsp_register( reg->type, reg->regnum );
1431             break;
1432 
1433 	/* For texkill */
1434         case BWRITERSPR_INPUT:
1435             token |= map_ps_input(This, reg);
1436             break;
1437 
1438         default:
1439             WARN("Invalid dest register type for 1.x pshader\n");
1440             This->state = E_INVALIDARG;
1441             return;
1442     }
1443 
1444     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1445     token |= d3d9_dstmod(mod);
1446 
1447     token |= d3d9_writemask(reg->u.writemask);
1448     put_dword(buffer, token);
1449 }
1450 
1451 static void instr_ps_1_4_mov(struct bc_writer *This,
1452                              const struct instruction *instr,
1453                              struct bytecode_buffer *buffer) {
1454     DWORD token = D3DSIO_MOV & D3DSI_OPCODE_MASK;
1455 
1456     if(instr->dst.type == BWRITERSPR_TEMP && instr->src[0].type == BWRITERSPR_INPUT) {
1457         if(instr->src[0].regnum == This->t_regnum[0] ||
1458            instr->src[0].regnum == This->t_regnum[1] ||
1459            instr->src[0].regnum == This->t_regnum[2] ||
1460            instr->src[0].regnum == This->t_regnum[3] ||
1461            instr->src[0].regnum == This->t_regnum[4] ||
1462            instr->src[0].regnum == This->t_regnum[5]) {
1463             /* Similar to a regular mov, but a different opcode */
1464             token = D3DSIO_TEXCOORD & D3DSI_OPCODE_MASK;
1465         } else if(instr->src[0].regnum == This->v_regnum[0] ||
1466                   instr->src[0].regnum == This->v_regnum[1]) {
1467             /* Handled by the normal mov below. Just drop out of the if condition */
1468         } else {
1469             WARN("Unsupported varying -> temp mov in ps_1_4\n");
1470             This->state = E_INVALIDARG;
1471             return;
1472         }
1473     }
1474 
1475     This->funcs->opcode(This, instr, token, buffer);
1476     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1477     This->funcs->srcreg(This, &instr->src[0], buffer);
1478 }
1479 
1480 static void instr_ps_1_4_texld(struct bc_writer *This,
1481                                const struct instruction *instr,
1482                                struct bytecode_buffer *buffer) {
1483     if(instr->src[1].type != BWRITERSPR_SAMPLER ||
1484        instr->src[1].regnum > 5) {
1485         WARN("Unsupported sampler type %u regnum %u\n",
1486              instr->src[1].type, instr->src[1].regnum);
1487         This->state = E_INVALIDARG;
1488         return;
1489     } else if(instr->dst.type != BWRITERSPR_TEMP) {
1490         WARN("Can only sample into a temp register\n");
1491         This->state = E_INVALIDARG;
1492         return;
1493     }
1494 
1495     if(instr->src[1].regnum != instr->dst.regnum) {
1496         WARN("Sampling from sampler s%u to register r%u is not possible in ps_1_4\n",
1497              instr->src[1].regnum, instr->dst.regnum);
1498         This->state = E_INVALIDARG;
1499         return;
1500     }
1501 
1502     This->funcs->opcode(This, instr, D3DSIO_TEX & D3DSI_OPCODE_MASK, buffer);
1503     This->funcs->dstreg(This, &instr->dst, buffer, instr->shift, instr->dstmod);
1504     This->funcs->srcreg(This, &instr->src[0], buffer);
1505 }
1506 
1507 static const struct instr_handler_table ps_1_4_handlers[] = {
1508     {BWRITERSIO_ADD,            instr_handler},
1509     {BWRITERSIO_NOP,            instr_handler},
1510     {BWRITERSIO_MOV,            instr_ps_1_4_mov},
1511     {BWRITERSIO_SUB,            instr_handler},
1512     {BWRITERSIO_MAD,            instr_handler},
1513     {BWRITERSIO_MUL,            instr_handler},
1514     {BWRITERSIO_DP3,            instr_handler},
1515     {BWRITERSIO_DP4,            instr_handler},
1516     {BWRITERSIO_LRP,            instr_handler},
1517 
1518     /* pshader instructions */
1519     {BWRITERSIO_CND,            instr_handler},
1520     {BWRITERSIO_CMP,            instr_handler},
1521     {BWRITERSIO_TEXKILL,        instr_handler},
1522     {BWRITERSIO_TEX,            instr_ps_1_4_texld},
1523     {BWRITERSIO_TEXDEPTH,       instr_handler},
1524     {BWRITERSIO_BEM,            instr_handler},
1525 
1526     {BWRITERSIO_PHASE,          instr_handler},
1527     {BWRITERSIO_END,            NULL},
1528 };
1529 
1530 static const struct bytecode_backend ps_1_4_backend = {
1531     ps_1_4_header,
1532     end,
1533     ps_1_4_srcreg,
1534     ps_1_4_dstreg,
1535     sm_1_x_opcode,
1536     ps_1_4_handlers
1537 };
1538 
1539 static void write_constB(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1540     write_const(shader->constB, shader->num_cb, D3DSIO_DEFB, D3DSPR_CONSTBOOL, buffer, len);
1541 }
1542 
1543 static void write_constI(const struct bwriter_shader *shader, struct bytecode_buffer *buffer, BOOL len) {
1544     write_const(shader->constI, shader->num_ci, D3DSIO_DEFI, D3DSPR_CONSTINT, buffer, len);
1545 }
1546 
1547 static void vs_2_header(struct bc_writer *This,
1548                         const struct bwriter_shader *shader,
1549                         struct bytecode_buffer *buffer) {
1550     HRESULT hr;
1551 
1552     hr = vs_find_builtin_varyings(This, shader);
1553     if(FAILED(hr)) {
1554         This->state = hr;
1555         return;
1556     }
1557 
1558     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1559     write_constF(shader, buffer, TRUE);
1560     write_constB(shader, buffer, TRUE);
1561     write_constI(shader, buffer, TRUE);
1562 }
1563 
1564 static void vs_2_srcreg(struct bc_writer *This,
1565                         const struct shader_reg *reg,
1566                         struct bytecode_buffer *buffer) {
1567     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1568     DWORD has_swizzle;
1569     DWORD component;
1570     DWORD d3d9reg;
1571 
1572     switch(reg->type) {
1573         case BWRITERSPR_OUTPUT:
1574             /* Map the swizzle to a writemask, the format expected
1575                by map_vs_output
1576              */
1577             switch(reg->u.swizzle) {
1578                 case BWRITERVS_SWIZZLE_X:
1579                     component = BWRITERSP_WRITEMASK_0;
1580                     break;
1581                 case BWRITERVS_SWIZZLE_Y:
1582                     component = BWRITERSP_WRITEMASK_1;
1583                     break;
1584                 case BWRITERVS_SWIZZLE_Z:
1585                     component = BWRITERSP_WRITEMASK_2;
1586                     break;
1587                 case BWRITERVS_SWIZZLE_W:
1588                     component = BWRITERSP_WRITEMASK_3;
1589                     break;
1590                 default:
1591                     component = 0;
1592             }
1593             token |= map_vs_output(This, reg->regnum, component, &has_swizzle);
1594             break;
1595 
1596         case BWRITERSPR_RASTOUT:
1597         case BWRITERSPR_ATTROUT:
1598             /* These registers are mapped to input and output regs. They can be encoded in the bytecode,
1599              * but are unexpected. If we hit this path it might be due to an error.
1600              */
1601             FIXME("Unexpected register type %u\n", reg->type);
1602             /* drop through */
1603         case BWRITERSPR_INPUT:
1604         case BWRITERSPR_TEMP:
1605         case BWRITERSPR_CONST:
1606         case BWRITERSPR_ADDR:
1607         case BWRITERSPR_CONSTINT:
1608         case BWRITERSPR_CONSTBOOL:
1609         case BWRITERSPR_LABEL:
1610             d3d9reg = d3d9_register(reg->type);
1611             token |= d3dsp_register( d3d9reg, reg->regnum );
1612             break;
1613 
1614         case BWRITERSPR_LOOP:
1615             if(reg->regnum != 0) {
1616                 WARN("Only regnum 0 is supported for the loop index register in vs_2_0\n");
1617                 This->state = E_INVALIDARG;
1618                 return;
1619             }
1620             token |= d3dsp_register( D3DSPR_LOOP, 0 );
1621             break;
1622 
1623         case BWRITERSPR_PREDICATE:
1624             if(This->version != BWRITERVS_VERSION(2, 1)){
1625                 WARN("Predicate register is allowed only in vs_2_x\n");
1626                 This->state = E_INVALIDARG;
1627                 return;
1628             }
1629             if(reg->regnum > 0) {
1630                 WARN("Only predicate register 0 is supported\n");
1631                 This->state = E_INVALIDARG;
1632                 return;
1633             }
1634             token |= d3dsp_register( D3DSPR_PREDICATE, 0 );
1635             break;
1636 
1637         default:
1638             WARN("Invalid register type for 2.0 vshader\n");
1639             This->state = E_INVALIDARG;
1640             return;
1641     }
1642 
1643     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1644 
1645     token |= d3d9_srcmod(reg->srcmod);
1646 
1647     if(reg->rel_reg)
1648         token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
1649 
1650     put_dword(buffer, token);
1651 
1652     /* vs_2_0 and newer write the register containing the index explicitly in the
1653      * binary code
1654      */
1655     if(token & D3DVS_ADDRMODE_RELATIVE)
1656         vs_2_srcreg(This, reg->rel_reg, buffer);
1657 }
1658 
1659 static void sm_2_opcode(struct bc_writer *This,
1660                         const struct instruction *instr,
1661                         DWORD token, struct bytecode_buffer *buffer) {
1662     /* From sm 2 onwards instruction length is encoded in the opcode field */
1663     int dsts = instr->has_dst ? 1 : 0;
1664     token |= instrlen(instr, instr->num_srcs, dsts) << D3DSI_INSTLENGTH_SHIFT;
1665     if(instr->comptype)
1666         token |= (d3d9_comparetype(instr->comptype) << 16) & (0xf << 16);
1667     if(instr->has_predicate)
1668         token |= D3DSHADER_INSTRUCTION_PREDICATED;
1669     put_dword(buffer,token);
1670 }
1671 
1672 static const struct instr_handler_table vs_2_0_handlers[] = {
1673     {BWRITERSIO_ADD,            instr_handler},
1674     {BWRITERSIO_NOP,            instr_handler},
1675     {BWRITERSIO_MOV,            instr_handler},
1676     {BWRITERSIO_SUB,            instr_handler},
1677     {BWRITERSIO_MAD,            instr_handler},
1678     {BWRITERSIO_MUL,            instr_handler},
1679     {BWRITERSIO_RCP,            instr_handler},
1680     {BWRITERSIO_RSQ,            instr_handler},
1681     {BWRITERSIO_DP3,            instr_handler},
1682     {BWRITERSIO_DP4,            instr_handler},
1683     {BWRITERSIO_MIN,            instr_handler},
1684     {BWRITERSIO_MAX,            instr_handler},
1685     {BWRITERSIO_SLT,            instr_handler},
1686     {BWRITERSIO_SGE,            instr_handler},
1687     {BWRITERSIO_ABS,            instr_handler},
1688     {BWRITERSIO_EXP,            instr_handler},
1689     {BWRITERSIO_LOG,            instr_handler},
1690     {BWRITERSIO_EXPP,           instr_handler},
1691     {BWRITERSIO_LOGP,           instr_handler},
1692     {BWRITERSIO_DST,            instr_handler},
1693     {BWRITERSIO_LRP,            instr_handler},
1694     {BWRITERSIO_FRC,            instr_handler},
1695     {BWRITERSIO_CRS,            instr_handler},
1696     {BWRITERSIO_SGN,            instr_handler},
1697     {BWRITERSIO_NRM,            instr_handler},
1698     {BWRITERSIO_SINCOS,         instr_handler},
1699     {BWRITERSIO_M4x4,           instr_handler},
1700     {BWRITERSIO_M4x3,           instr_handler},
1701     {BWRITERSIO_M3x4,           instr_handler},
1702     {BWRITERSIO_M3x3,           instr_handler},
1703     {BWRITERSIO_M3x2,           instr_handler},
1704     {BWRITERSIO_LIT,            instr_handler},
1705     {BWRITERSIO_POW,            instr_handler},
1706     {BWRITERSIO_MOVA,           instr_handler},
1707 
1708     {BWRITERSIO_CALL,           instr_handler},
1709     {BWRITERSIO_CALLNZ,         instr_handler},
1710     {BWRITERSIO_REP,            instr_handler},
1711     {BWRITERSIO_ENDREP,         instr_handler},
1712     {BWRITERSIO_IF,             instr_handler},
1713     {BWRITERSIO_LABEL,          instr_handler},
1714     {BWRITERSIO_ELSE,           instr_handler},
1715     {BWRITERSIO_ENDIF,          instr_handler},
1716     {BWRITERSIO_LOOP,           instr_handler},
1717     {BWRITERSIO_RET,            instr_handler},
1718     {BWRITERSIO_ENDLOOP,        instr_handler},
1719 
1720     {BWRITERSIO_END,            NULL},
1721 };
1722 
1723 static const struct bytecode_backend vs_2_0_backend = {
1724     vs_2_header,
1725     end,
1726     vs_2_srcreg,
1727     vs_12_dstreg,
1728     sm_2_opcode,
1729     vs_2_0_handlers
1730 };
1731 
1732 static const struct instr_handler_table vs_2_x_handlers[] = {
1733     {BWRITERSIO_ADD,            instr_handler},
1734     {BWRITERSIO_NOP,            instr_handler},
1735     {BWRITERSIO_MOV,            instr_handler},
1736     {BWRITERSIO_SUB,            instr_handler},
1737     {BWRITERSIO_MAD,            instr_handler},
1738     {BWRITERSIO_MUL,            instr_handler},
1739     {BWRITERSIO_RCP,            instr_handler},
1740     {BWRITERSIO_RSQ,            instr_handler},
1741     {BWRITERSIO_DP3,            instr_handler},
1742     {BWRITERSIO_DP4,            instr_handler},
1743     {BWRITERSIO_MIN,            instr_handler},
1744     {BWRITERSIO_MAX,            instr_handler},
1745     {BWRITERSIO_SLT,            instr_handler},
1746     {BWRITERSIO_SGE,            instr_handler},
1747     {BWRITERSIO_ABS,            instr_handler},
1748     {BWRITERSIO_EXP,            instr_handler},
1749     {BWRITERSIO_LOG,            instr_handler},
1750     {BWRITERSIO_EXPP,           instr_handler},
1751     {BWRITERSIO_LOGP,           instr_handler},
1752     {BWRITERSIO_DST,            instr_handler},
1753     {BWRITERSIO_LRP,            instr_handler},
1754     {BWRITERSIO_FRC,            instr_handler},
1755     {BWRITERSIO_CRS,            instr_handler},
1756     {BWRITERSIO_SGN,            instr_handler},
1757     {BWRITERSIO_NRM,            instr_handler},
1758     {BWRITERSIO_SINCOS,         instr_handler},
1759     {BWRITERSIO_M4x4,           instr_handler},
1760     {BWRITERSIO_M4x3,           instr_handler},
1761     {BWRITERSIO_M3x4,           instr_handler},
1762     {BWRITERSIO_M3x3,           instr_handler},
1763     {BWRITERSIO_M3x2,           instr_handler},
1764     {BWRITERSIO_LIT,            instr_handler},
1765     {BWRITERSIO_POW,            instr_handler},
1766     {BWRITERSIO_MOVA,           instr_handler},
1767 
1768     {BWRITERSIO_CALL,           instr_handler},
1769     {BWRITERSIO_CALLNZ,         instr_handler},
1770     {BWRITERSIO_REP,            instr_handler},
1771     {BWRITERSIO_ENDREP,         instr_handler},
1772     {BWRITERSIO_IF,             instr_handler},
1773     {BWRITERSIO_LABEL,          instr_handler},
1774     {BWRITERSIO_IFC,            instr_handler},
1775     {BWRITERSIO_ELSE,           instr_handler},
1776     {BWRITERSIO_ENDIF,          instr_handler},
1777     {BWRITERSIO_BREAK,          instr_handler},
1778     {BWRITERSIO_BREAKC,         instr_handler},
1779     {BWRITERSIO_LOOP,           instr_handler},
1780     {BWRITERSIO_RET,            instr_handler},
1781     {BWRITERSIO_ENDLOOP,        instr_handler},
1782 
1783     {BWRITERSIO_SETP,           instr_handler},
1784     {BWRITERSIO_BREAKP,         instr_handler},
1785 
1786     {BWRITERSIO_END,            NULL},
1787 };
1788 
1789 static const struct bytecode_backend vs_2_x_backend = {
1790     vs_2_header,
1791     end,
1792     vs_2_srcreg,
1793     vs_12_dstreg,
1794     sm_2_opcode,
1795     vs_2_x_handlers
1796 };
1797 
1798 static void write_samplers(const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1799     DWORD i;
1800     DWORD instr_dcl = D3DSIO_DCL | (2 << D3DSI_INSTLENGTH_SHIFT);
1801     DWORD token;
1802     const DWORD reg = (1u << 31) | d3dsp_register( D3DSPR_SAMPLER, 0 ) | D3DSP_WRITEMASK_ALL;
1803 
1804     for(i = 0; i < shader->num_samplers; i++) {
1805         /* Write the DCL instruction */
1806         put_dword(buffer, instr_dcl);
1807         token = (1u << 31);
1808         /* Already shifted */
1809         token |= (d3d9_sampler(shader->samplers[i].type)) & D3DSP_TEXTURETYPE_MASK;
1810         put_dword(buffer, token);
1811         token = reg | (shader->samplers[i].regnum & D3DSP_REGNUM_MASK);
1812         token |= d3d9_dstmod(shader->samplers[i].mod);
1813         put_dword(buffer, token);
1814     }
1815 }
1816 
1817 static void ps_2_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
1818     HRESULT hr = find_ps_builtin_semantics(This, shader, 8);
1819     if(FAILED(hr)) {
1820         This->state = hr;
1821         return;
1822     }
1823 
1824     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
1825     write_samplers(shader, buffer);
1826     write_constF(shader, buffer, TRUE);
1827     write_constB(shader, buffer, TRUE);
1828     write_constI(shader, buffer, TRUE);
1829 }
1830 
1831 static void ps_2_srcreg(struct bc_writer *This,
1832                         const struct shader_reg *reg,
1833                         struct bytecode_buffer *buffer) {
1834     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1835     DWORD d3d9reg;
1836     if(reg->rel_reg) {
1837         WARN("Relative addressing not supported in <= ps_3_0\n");
1838         This->state = E_INVALIDARG;
1839         return;
1840     }
1841 
1842     switch(reg->type) {
1843         case BWRITERSPR_INPUT:
1844             token |= map_ps_input(This, reg);
1845             break;
1846 
1847             /* Can be mapped 1:1 */
1848         case BWRITERSPR_TEMP:
1849         case BWRITERSPR_CONST:
1850         case BWRITERSPR_COLOROUT:
1851         case BWRITERSPR_CONSTBOOL:
1852         case BWRITERSPR_CONSTINT:
1853         case BWRITERSPR_SAMPLER:
1854         case BWRITERSPR_LABEL:
1855         case BWRITERSPR_DEPTHOUT:
1856             d3d9reg = d3d9_register(reg->type);
1857             token |= d3dsp_register( d3d9reg, reg->regnum );
1858             break;
1859 
1860         case BWRITERSPR_PREDICATE:
1861             if(This->version != BWRITERPS_VERSION(2, 1)){
1862                 WARN("Predicate register not supported in ps_2_0\n");
1863                 This->state = E_INVALIDARG;
1864             }
1865             if(reg->regnum) {
1866                 WARN("Predicate register with regnum %u not supported\n",
1867                      reg->regnum);
1868                 This->state = E_INVALIDARG;
1869             }
1870             token |= d3dsp_register( D3DSPR_PREDICATE, 0 );
1871             break;
1872 
1873         default:
1874             WARN("Invalid register type for ps_2_0 shader\n");
1875             This->state = E_INVALIDARG;
1876             return;
1877     }
1878 
1879     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK; /* already shifted */
1880 
1881     token |= d3d9_srcmod(reg->srcmod);
1882     put_dword(buffer, token);
1883 }
1884 
1885 static void ps_2_0_dstreg(struct bc_writer *This,
1886                           const struct shader_reg *reg,
1887                           struct bytecode_buffer *buffer,
1888                           DWORD shift, DWORD mod) {
1889     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
1890     DWORD d3d9reg;
1891 
1892     if(reg->rel_reg) {
1893         WARN("Relative addressing not supported for destination registers\n");
1894         This->state = E_INVALIDARG;
1895         return;
1896     }
1897 
1898     switch(reg->type) {
1899         case BWRITERSPR_TEMP: /* 1:1 mapping */
1900         case BWRITERSPR_COLOROUT:
1901         case BWRITERSPR_DEPTHOUT:
1902             d3d9reg = d3d9_register(reg->type);
1903             token |= d3dsp_register( d3d9reg, reg->regnum );
1904             break;
1905 
1906         case BWRITERSPR_PREDICATE:
1907             if(This->version != BWRITERPS_VERSION(2, 1)){
1908                 WARN("Predicate register not supported in ps_2_0\n");
1909                 This->state = E_INVALIDARG;
1910             }
1911             token |= d3dsp_register( D3DSPR_PREDICATE, reg->regnum );
1912             break;
1913 
1914 	/* texkill uses the input register as a destination parameter */
1915         case BWRITERSPR_INPUT:
1916             token |= map_ps_input(This, reg);
1917             break;
1918 
1919         default:
1920             WARN("Invalid dest register type for 2.x pshader\n");
1921             This->state = E_INVALIDARG;
1922             return;
1923     }
1924 
1925     token |= (shift << D3DSP_DSTSHIFT_SHIFT) & D3DSP_DSTSHIFT_MASK;
1926     token |= d3d9_dstmod(mod);
1927 
1928     token |= d3d9_writemask(reg->u.writemask);
1929     put_dword(buffer, token);
1930 }
1931 
1932 static const struct instr_handler_table ps_2_0_handlers[] = {
1933     {BWRITERSIO_ADD,            instr_handler},
1934     {BWRITERSIO_NOP,            instr_handler},
1935     {BWRITERSIO_MOV,            instr_handler},
1936     {BWRITERSIO_SUB,            instr_handler},
1937     {BWRITERSIO_MAD,            instr_handler},
1938     {BWRITERSIO_MUL,            instr_handler},
1939     {BWRITERSIO_RCP,            instr_handler},
1940     {BWRITERSIO_RSQ,            instr_handler},
1941     {BWRITERSIO_DP3,            instr_handler},
1942     {BWRITERSIO_DP4,            instr_handler},
1943     {BWRITERSIO_MIN,            instr_handler},
1944     {BWRITERSIO_MAX,            instr_handler},
1945     {BWRITERSIO_ABS,            instr_handler},
1946     {BWRITERSIO_EXP,            instr_handler},
1947     {BWRITERSIO_LOG,            instr_handler},
1948     {BWRITERSIO_EXPP,           instr_handler},
1949     {BWRITERSIO_LOGP,           instr_handler},
1950     {BWRITERSIO_LRP,            instr_handler},
1951     {BWRITERSIO_FRC,            instr_handler},
1952     {BWRITERSIO_CRS,            instr_handler},
1953     {BWRITERSIO_NRM,            instr_handler},
1954     {BWRITERSIO_SINCOS,         instr_handler},
1955     {BWRITERSIO_M4x4,           instr_handler},
1956     {BWRITERSIO_M4x3,           instr_handler},
1957     {BWRITERSIO_M3x4,           instr_handler},
1958     {BWRITERSIO_M3x3,           instr_handler},
1959     {BWRITERSIO_M3x2,           instr_handler},
1960     {BWRITERSIO_POW,            instr_handler},
1961     {BWRITERSIO_DP2ADD,         instr_handler},
1962     {BWRITERSIO_CMP,            instr_handler},
1963 
1964     {BWRITERSIO_TEX,            instr_handler},
1965     {BWRITERSIO_TEXLDP,         instr_handler},
1966     {BWRITERSIO_TEXLDB,         instr_handler},
1967     {BWRITERSIO_TEXKILL,        instr_handler},
1968 
1969     {BWRITERSIO_END,            NULL},
1970 };
1971 
1972 static const struct bytecode_backend ps_2_0_backend = {
1973     ps_2_header,
1974     end,
1975     ps_2_srcreg,
1976     ps_2_0_dstreg,
1977     sm_2_opcode,
1978     ps_2_0_handlers
1979 };
1980 
1981 static const struct instr_handler_table ps_2_x_handlers[] = {
1982     {BWRITERSIO_ADD,            instr_handler},
1983     {BWRITERSIO_NOP,            instr_handler},
1984     {BWRITERSIO_MOV,            instr_handler},
1985     {BWRITERSIO_SUB,            instr_handler},
1986     {BWRITERSIO_MAD,            instr_handler},
1987     {BWRITERSIO_MUL,            instr_handler},
1988     {BWRITERSIO_RCP,            instr_handler},
1989     {BWRITERSIO_RSQ,            instr_handler},
1990     {BWRITERSIO_DP3,            instr_handler},
1991     {BWRITERSIO_DP4,            instr_handler},
1992     {BWRITERSIO_MIN,            instr_handler},
1993     {BWRITERSIO_MAX,            instr_handler},
1994     {BWRITERSIO_ABS,            instr_handler},
1995     {BWRITERSIO_EXP,            instr_handler},
1996     {BWRITERSIO_LOG,            instr_handler},
1997     {BWRITERSIO_EXPP,           instr_handler},
1998     {BWRITERSIO_LOGP,           instr_handler},
1999     {BWRITERSIO_LRP,            instr_handler},
2000     {BWRITERSIO_FRC,            instr_handler},
2001     {BWRITERSIO_CRS,            instr_handler},
2002     {BWRITERSIO_NRM,            instr_handler},
2003     {BWRITERSIO_SINCOS,         instr_handler},
2004     {BWRITERSIO_M4x4,           instr_handler},
2005     {BWRITERSIO_M4x3,           instr_handler},
2006     {BWRITERSIO_M3x4,           instr_handler},
2007     {BWRITERSIO_M3x3,           instr_handler},
2008     {BWRITERSIO_M3x2,           instr_handler},
2009     {BWRITERSIO_POW,            instr_handler},
2010     {BWRITERSIO_DP2ADD,         instr_handler},
2011     {BWRITERSIO_CMP,            instr_handler},
2012 
2013     {BWRITERSIO_CALL,           instr_handler},
2014     {BWRITERSIO_CALLNZ,         instr_handler},
2015     {BWRITERSIO_REP,            instr_handler},
2016     {BWRITERSIO_ENDREP,         instr_handler},
2017     {BWRITERSIO_IF,             instr_handler},
2018     {BWRITERSIO_LABEL,          instr_handler},
2019     {BWRITERSIO_IFC,            instr_handler},
2020     {BWRITERSIO_ELSE,           instr_handler},
2021     {BWRITERSIO_ENDIF,          instr_handler},
2022     {BWRITERSIO_BREAK,          instr_handler},
2023     {BWRITERSIO_BREAKC,         instr_handler},
2024     {BWRITERSIO_RET,            instr_handler},
2025 
2026     {BWRITERSIO_TEX,            instr_handler},
2027     {BWRITERSIO_TEXLDP,         instr_handler},
2028     {BWRITERSIO_TEXLDB,         instr_handler},
2029     {BWRITERSIO_TEXKILL,        instr_handler},
2030     {BWRITERSIO_DSX,            instr_handler},
2031     {BWRITERSIO_DSY,            instr_handler},
2032 
2033     {BWRITERSIO_SETP,           instr_handler},
2034     {BWRITERSIO_BREAKP,         instr_handler},
2035 
2036     {BWRITERSIO_TEXLDD,         instr_handler},
2037 
2038     {BWRITERSIO_END,            NULL},
2039 };
2040 
2041 static const struct bytecode_backend ps_2_x_backend = {
2042     ps_2_header,
2043     end,
2044     ps_2_srcreg,
2045     ps_2_0_dstreg,
2046     sm_2_opcode,
2047     ps_2_x_handlers
2048 };
2049 
2050 static void sm_3_header(struct bc_writer *This, const struct bwriter_shader *shader, struct bytecode_buffer *buffer) {
2051     write_declarations(This, buffer, TRUE, shader->inputs, shader->num_inputs, BWRITERSPR_INPUT);
2052     write_declarations(This, buffer, TRUE, shader->outputs, shader->num_outputs, BWRITERSPR_OUTPUT);
2053     write_constF(shader, buffer, TRUE);
2054     write_constB(shader, buffer, TRUE);
2055     write_constI(shader, buffer, TRUE);
2056     write_samplers(shader, buffer);
2057 }
2058 
2059 static void sm_3_srcreg(struct bc_writer *This,
2060                         const struct shader_reg *reg,
2061                         struct bytecode_buffer *buffer) {
2062     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
2063     DWORD d3d9reg;
2064 
2065     d3d9reg = d3d9_register(reg->type);
2066     token |= d3dsp_register( d3d9reg, reg->regnum );
2067     token |= d3d9_swizzle(reg->u.swizzle) & D3DVS_SWIZZLE_MASK;
2068     token |= d3d9_srcmod(reg->srcmod);
2069 
2070     if(reg->rel_reg) {
2071         if(reg->type == BWRITERSPR_CONST && This->version == BWRITERPS_VERSION(3, 0)) {
2072             WARN("c%u[...] is unsupported in ps_3_0\n", reg->regnum);
2073             This->state = E_INVALIDARG;
2074             return;
2075         }
2076         if(((reg->rel_reg->type == BWRITERSPR_ADDR && This->version == BWRITERVS_VERSION(3, 0)) ||
2077            reg->rel_reg->type == BWRITERSPR_LOOP) &&
2078            reg->rel_reg->regnum == 0) {
2079             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
2080         } else {
2081             WARN("Unsupported relative addressing register\n");
2082             This->state = E_INVALIDARG;
2083             return;
2084         }
2085     }
2086 
2087     put_dword(buffer, token);
2088 
2089     /* vs_2_0 and newer write the register containing the index explicitly in the
2090      * binary code
2091      */
2092     if(token & D3DVS_ADDRMODE_RELATIVE) {
2093         sm_3_srcreg(This, reg->rel_reg, buffer);
2094     }
2095 }
2096 
2097 static void sm_3_dstreg(struct bc_writer *This,
2098                         const struct shader_reg *reg,
2099                         struct bytecode_buffer *buffer,
2100                         DWORD shift, DWORD mod) {
2101     DWORD token = (1u << 31); /* Bit 31 of registers is 1 */
2102     DWORD d3d9reg;
2103 
2104     if(reg->rel_reg) {
2105         if(This->version == BWRITERVS_VERSION(3, 0) &&
2106            reg->type == BWRITERSPR_OUTPUT) {
2107             token |= D3DVS_ADDRMODE_RELATIVE & D3DVS_ADDRESSMODE_MASK;
2108         } else {
2109             WARN("Relative addressing not supported for this shader type or register type\n");
2110             This->state = E_INVALIDARG;
2111             return;
2112         }
2113     }
2114 
2115     d3d9reg = d3d9_register(reg->type);
2116     token |= d3dsp_register( d3d9reg, reg->regnum );
2117     token |= d3d9_dstmod(mod);
2118     token |= d3d9_writemask(reg->u.writemask);
2119     put_dword(buffer, token);
2120 
2121     /* vs_2_0 and newer write the register containing the index explicitly in the
2122      * binary code
2123      */
2124     if(token & D3DVS_ADDRMODE_RELATIVE) {
2125         sm_3_srcreg(This, reg->rel_reg, buffer);
2126     }
2127 }
2128 
2129 static const struct instr_handler_table vs_3_handlers[] = {
2130     {BWRITERSIO_ADD,            instr_handler},
2131     {BWRITERSIO_NOP,            instr_handler},
2132     {BWRITERSIO_MOV,            instr_handler},
2133     {BWRITERSIO_SUB,            instr_handler},
2134     {BWRITERSIO_MAD,            instr_handler},
2135     {BWRITERSIO_MUL,            instr_handler},
2136     {BWRITERSIO_RCP,            instr_handler},
2137     {BWRITERSIO_RSQ,            instr_handler},
2138     {BWRITERSIO_DP3,            instr_handler},
2139     {BWRITERSIO_DP4,            instr_handler},
2140     {BWRITERSIO_MIN,            instr_handler},
2141     {BWRITERSIO_MAX,            instr_handler},
2142     {BWRITERSIO_SLT,            instr_handler},
2143     {BWRITERSIO_SGE,            instr_handler},
2144     {BWRITERSIO_ABS,            instr_handler},
2145     {BWRITERSIO_EXP,            instr_handler},
2146     {BWRITERSIO_LOG,            instr_handler},
2147     {BWRITERSIO_EXPP,           instr_handler},
2148     {BWRITERSIO_LOGP,           instr_handler},
2149     {BWRITERSIO_DST,            instr_handler},
2150     {BWRITERSIO_LRP,            instr_handler},
2151     {BWRITERSIO_FRC,            instr_handler},
2152     {BWRITERSIO_CRS,            instr_handler},
2153     {BWRITERSIO_SGN,            instr_handler},
2154     {BWRITERSIO_NRM,            instr_handler},
2155     {BWRITERSIO_SINCOS,         instr_handler},
2156     {BWRITERSIO_M4x4,           instr_handler},
2157     {BWRITERSIO_M4x3,           instr_handler},
2158     {BWRITERSIO_M3x4,           instr_handler},
2159     {BWRITERSIO_M3x3,           instr_handler},
2160     {BWRITERSIO_M3x2,           instr_handler},
2161     {BWRITERSIO_LIT,            instr_handler},
2162     {BWRITERSIO_POW,            instr_handler},
2163     {BWRITERSIO_MOVA,           instr_handler},
2164 
2165     {BWRITERSIO_CALL,           instr_handler},
2166     {BWRITERSIO_CALLNZ,         instr_handler},
2167     {BWRITERSIO_REP,            instr_handler},
2168     {BWRITERSIO_ENDREP,         instr_handler},
2169     {BWRITERSIO_IF,             instr_handler},
2170     {BWRITERSIO_LABEL,          instr_handler},
2171     {BWRITERSIO_IFC,            instr_handler},
2172     {BWRITERSIO_ELSE,           instr_handler},
2173     {BWRITERSIO_ENDIF,          instr_handler},
2174     {BWRITERSIO_BREAK,          instr_handler},
2175     {BWRITERSIO_BREAKC,         instr_handler},
2176     {BWRITERSIO_LOOP,           instr_handler},
2177     {BWRITERSIO_RET,            instr_handler},
2178     {BWRITERSIO_ENDLOOP,        instr_handler},
2179 
2180     {BWRITERSIO_SETP,           instr_handler},
2181     {BWRITERSIO_BREAKP,         instr_handler},
2182     {BWRITERSIO_TEXLDL,         instr_handler},
2183 
2184     {BWRITERSIO_END,            NULL},
2185 };
2186 
2187 static const struct bytecode_backend vs_3_backend = {
2188     sm_3_header,
2189     end,
2190     sm_3_srcreg,
2191     sm_3_dstreg,
2192     sm_2_opcode,
2193     vs_3_handlers
2194 };
2195 
2196 static const struct instr_handler_table ps_3_handlers[] = {
2197     {BWRITERSIO_ADD,            instr_handler},
2198     {BWRITERSIO_NOP,            instr_handler},
2199     {BWRITERSIO_MOV,            instr_handler},
2200     {BWRITERSIO_SUB,            instr_handler},
2201     {BWRITERSIO_MAD,            instr_handler},
2202     {BWRITERSIO_MUL,            instr_handler},
2203     {BWRITERSIO_RCP,            instr_handler},
2204     {BWRITERSIO_RSQ,            instr_handler},
2205     {BWRITERSIO_DP3,            instr_handler},
2206     {BWRITERSIO_DP4,            instr_handler},
2207     {BWRITERSIO_MIN,            instr_handler},
2208     {BWRITERSIO_MAX,            instr_handler},
2209     {BWRITERSIO_ABS,            instr_handler},
2210     {BWRITERSIO_EXP,            instr_handler},
2211     {BWRITERSIO_LOG,            instr_handler},
2212     {BWRITERSIO_EXPP,           instr_handler},
2213     {BWRITERSIO_LOGP,           instr_handler},
2214     {BWRITERSIO_LRP,            instr_handler},
2215     {BWRITERSIO_FRC,            instr_handler},
2216     {BWRITERSIO_CRS,            instr_handler},
2217     {BWRITERSIO_NRM,            instr_handler},
2218     {BWRITERSIO_SINCOS,         instr_handler},
2219     {BWRITERSIO_M4x4,           instr_handler},
2220     {BWRITERSIO_M4x3,           instr_handler},
2221     {BWRITERSIO_M3x4,           instr_handler},
2222     {BWRITERSIO_M3x3,           instr_handler},
2223     {BWRITERSIO_M3x2,           instr_handler},
2224     {BWRITERSIO_POW,            instr_handler},
2225     {BWRITERSIO_DP2ADD,         instr_handler},
2226     {BWRITERSIO_CMP,            instr_handler},
2227 
2228     {BWRITERSIO_CALL,           instr_handler},
2229     {BWRITERSIO_CALLNZ,         instr_handler},
2230     {BWRITERSIO_REP,            instr_handler},
2231     {BWRITERSIO_ENDREP,         instr_handler},
2232     {BWRITERSIO_IF,             instr_handler},
2233     {BWRITERSIO_LABEL,          instr_handler},
2234     {BWRITERSIO_IFC,            instr_handler},
2235     {BWRITERSIO_ELSE,           instr_handler},
2236     {BWRITERSIO_ENDIF,          instr_handler},
2237     {BWRITERSIO_BREAK,          instr_handler},
2238     {BWRITERSIO_BREAKC,         instr_handler},
2239     {BWRITERSIO_LOOP,           instr_handler},
2240     {BWRITERSIO_RET,            instr_handler},
2241     {BWRITERSIO_ENDLOOP,        instr_handler},
2242 
2243     {BWRITERSIO_SETP,           instr_handler},
2244     {BWRITERSIO_BREAKP,         instr_handler},
2245     {BWRITERSIO_TEXLDL,         instr_handler},
2246 
2247     {BWRITERSIO_TEX,            instr_handler},
2248     {BWRITERSIO_TEXLDP,         instr_handler},
2249     {BWRITERSIO_TEXLDB,         instr_handler},
2250     {BWRITERSIO_TEXKILL,        instr_handler},
2251     {BWRITERSIO_DSX,            instr_handler},
2252     {BWRITERSIO_DSY,            instr_handler},
2253     {BWRITERSIO_TEXLDD,         instr_handler},
2254 
2255     {BWRITERSIO_END,            NULL},
2256 };
2257 
2258 static const struct bytecode_backend ps_3_backend = {
2259     sm_3_header,
2260     end,
2261     sm_3_srcreg,
2262     sm_3_dstreg,
2263     sm_2_opcode,
2264     ps_3_handlers
2265 };
2266 
2267 static void init_vs10_dx9_writer(struct bc_writer *writer) {
2268     TRACE("Creating DirectX9 vertex shader 1.0 writer\n");
2269     writer->funcs = &vs_1_x_backend;
2270 }
2271 
2272 static void init_vs11_dx9_writer(struct bc_writer *writer) {
2273     TRACE("Creating DirectX9 vertex shader 1.1 writer\n");
2274     writer->funcs = &vs_1_x_backend;
2275 }
2276 
2277 static void init_vs20_dx9_writer(struct bc_writer *writer) {
2278     TRACE("Creating DirectX9 vertex shader 2.0 writer\n");
2279     writer->funcs = &vs_2_0_backend;
2280 }
2281 
2282 static void init_vs2x_dx9_writer(struct bc_writer *writer) {
2283     TRACE("Creating DirectX9 vertex shader 2.x writer\n");
2284     writer->funcs = &vs_2_x_backend;
2285 }
2286 
2287 static void init_vs30_dx9_writer(struct bc_writer *writer) {
2288     TRACE("Creating DirectX9 vertex shader 3.0 writer\n");
2289     writer->funcs = &vs_3_backend;
2290 }
2291 
2292 static void init_ps10_dx9_writer(struct bc_writer *writer) {
2293     TRACE("Creating DirectX9 pixel shader 1.0 writer\n");
2294     writer->funcs = &ps_1_0123_backend;
2295 }
2296 
2297 static void init_ps11_dx9_writer(struct bc_writer *writer) {
2298     TRACE("Creating DirectX9 pixel shader 1.1 writer\n");
2299     writer->funcs = &ps_1_0123_backend;
2300 }
2301 
2302 static void init_ps12_dx9_writer(struct bc_writer *writer) {
2303     TRACE("Creating DirectX9 pixel shader 1.2 writer\n");
2304     writer->funcs = &ps_1_0123_backend;
2305 }
2306 
2307 static void init_ps13_dx9_writer(struct bc_writer *writer) {
2308     TRACE("Creating DirectX9 pixel shader 1.3 writer\n");
2309     writer->funcs = &ps_1_0123_backend;
2310 }
2311 
2312 static void init_ps14_dx9_writer(struct bc_writer *writer) {
2313     TRACE("Creating DirectX9 pixel shader 1.4 writer\n");
2314     writer->funcs = &ps_1_4_backend;
2315 }
2316 
2317 static void init_ps20_dx9_writer(struct bc_writer *writer) {
2318     TRACE("Creating DirectX9 pixel shader 2.0 writer\n");
2319     writer->funcs = &ps_2_0_backend;
2320 }
2321 
2322 static void init_ps2x_dx9_writer(struct bc_writer *writer) {
2323     TRACE("Creating DirectX9 pixel shader 2.x writer\n");
2324     writer->funcs = &ps_2_x_backend;
2325 }
2326 
2327 static void init_ps30_dx9_writer(struct bc_writer *writer) {
2328     TRACE("Creating DirectX9 pixel shader 3.0 writer\n");
2329     writer->funcs = &ps_3_backend;
2330 }
2331 
2332 static struct bc_writer *create_writer(DWORD version, DWORD dxversion) {
2333     struct bc_writer *ret = d3dcompiler_alloc(sizeof(*ret));
2334 
2335     if(!ret) {
2336         WARN("Failed to allocate a bytecode writer instance\n");
2337         return NULL;
2338     }
2339 
2340     switch(version) {
2341         case BWRITERVS_VERSION(1, 0):
2342             if(dxversion != 9) {
2343                 WARN("Unsupported dxversion for vertex shader 1.0 requested: %u\n", dxversion);
2344                 goto fail;
2345             }
2346             init_vs10_dx9_writer(ret);
2347             break;
2348         case BWRITERVS_VERSION(1, 1):
2349             if(dxversion != 9) {
2350                 WARN("Unsupported dxversion for vertex shader 1.1 requested: %u\n", dxversion);
2351                 goto fail;
2352             }
2353             init_vs11_dx9_writer(ret);
2354             break;
2355         case BWRITERVS_VERSION(2, 0):
2356             if(dxversion != 9) {
2357                 WARN("Unsupported dxversion for vertex shader 2.0 requested: %u\n", dxversion);
2358                 goto fail;
2359             }
2360             init_vs20_dx9_writer(ret);
2361             break;
2362         case BWRITERVS_VERSION(2, 1):
2363             if(dxversion != 9) {
2364                 WARN("Unsupported dxversion for vertex shader 2.x requested: %u\n", dxversion);
2365                 goto fail;
2366             }
2367             init_vs2x_dx9_writer(ret);
2368             break;
2369         case BWRITERVS_VERSION(3, 0):
2370             if(dxversion != 9) {
2371                 WARN("Unsupported dxversion for vertex shader 3.0 requested: %u\n", dxversion);
2372                 goto fail;
2373             }
2374             init_vs30_dx9_writer(ret);
2375             break;
2376 
2377         case BWRITERPS_VERSION(1, 0):
2378             if(dxversion != 9) {
2379                 WARN("Unsupported dxversion for pixel shader 1.0 requested: %u\n", dxversion);
2380                 goto fail;
2381             }
2382             init_ps10_dx9_writer(ret);
2383             break;
2384         case BWRITERPS_VERSION(1, 1):
2385             if(dxversion != 9) {
2386                 WARN("Unsupported dxversion for pixel shader 1.1 requested: %u\n", dxversion);
2387                 goto fail;
2388             }
2389             init_ps11_dx9_writer(ret);
2390             break;
2391         case BWRITERPS_VERSION(1, 2):
2392             if(dxversion != 9) {
2393                 WARN("Unsupported dxversion for pixel shader 1.2 requested: %u\n", dxversion);
2394                 goto fail;
2395             }
2396             init_ps12_dx9_writer(ret);
2397             break;
2398         case BWRITERPS_VERSION(1, 3):
2399             if(dxversion != 9) {
2400                 WARN("Unsupported dxversion for pixel shader 1.3 requested: %u\n", dxversion);
2401                 goto fail;
2402             }
2403             init_ps13_dx9_writer(ret);
2404             break;
2405         case BWRITERPS_VERSION(1, 4):
2406             if(dxversion != 9) {
2407                 WARN("Unsupported dxversion for pixel shader 1.4 requested: %u\n", dxversion);
2408                 goto fail;
2409             }
2410             init_ps14_dx9_writer(ret);
2411             break;
2412 
2413         case BWRITERPS_VERSION(2, 0):
2414             if(dxversion != 9) {
2415                 WARN("Unsupported dxversion for pixel shader 2.0 requested: %u\n", dxversion);
2416                 goto fail;
2417             }
2418             init_ps20_dx9_writer(ret);
2419             break;
2420 
2421         case BWRITERPS_VERSION(2, 1):
2422             if(dxversion != 9) {
2423                 WARN("Unsupported dxversion for pixel shader 2.x requested: %u\n", dxversion);
2424                 goto fail;
2425             }
2426             init_ps2x_dx9_writer(ret);
2427             break;
2428 
2429         case BWRITERPS_VERSION(3, 0):
2430             if(dxversion != 9) {
2431                 WARN("Unsupported dxversion for pixel shader 3.0 requested: %u\n", dxversion);
2432                 goto fail;
2433             }
2434             init_ps30_dx9_writer(ret);
2435             break;
2436 
2437         default:
2438             WARN("Unexpected shader version requested: %08x\n", version);
2439             goto fail;
2440     }
2441     ret->version = version;
2442     return ret;
2443 
2444 fail:
2445     d3dcompiler_free(ret);
2446     return NULL;
2447 }
2448 
2449 static HRESULT call_instr_handler(struct bc_writer *writer,
2450                                   const struct instruction *instr,
2451                                   struct bytecode_buffer *buffer) {
2452     DWORD i=0;
2453 
2454     while(writer->funcs->instructions[i].opcode != BWRITERSIO_END) {
2455         if(instr->opcode == writer->funcs->instructions[i].opcode) {
2456             if(!writer->funcs->instructions[i].func) {
2457                 WARN("Opcode %u not supported by this profile\n", instr->opcode);
2458                 return E_INVALIDARG;
2459             }
2460             writer->funcs->instructions[i].func(writer, instr, buffer);
2461             return S_OK;
2462         }
2463         i++;
2464     }
2465 
2466     FIXME("Unhandled instruction %u - %s\n", instr->opcode,
2467           debug_print_opcode(instr->opcode));
2468     return E_INVALIDARG;
2469 }
2470 
2471 HRESULT SlWriteBytecode(const struct bwriter_shader *shader, int dxversion, DWORD **result, DWORD *size)
2472 {
2473     struct bc_writer *writer;
2474     struct bytecode_buffer *buffer = NULL;
2475     HRESULT hr;
2476     unsigned int i;
2477 
2478     if(!shader){
2479         ERR("NULL shader structure, aborting\n");
2480         return E_FAIL;
2481     }
2482     writer = create_writer(shader->version, dxversion);
2483     *result = NULL;
2484 
2485     if(!writer) {
2486         WARN("Could not create a bytecode writer instance. Either unsupported version\n");
2487         WARN("or out of memory\n");
2488         hr = E_FAIL;
2489         goto error;
2490     }
2491 
2492     buffer = allocate_buffer();
2493     if(!buffer) {
2494         WARN("Failed to allocate a buffer for the shader bytecode\n");
2495         hr = E_FAIL;
2496         goto error;
2497     }
2498 
2499     /* Write shader type and version */
2500     put_dword(buffer, shader->version);
2501 
2502     writer->funcs->header(writer, shader, buffer);
2503     if(FAILED(writer->state)) {
2504         hr = writer->state;
2505         goto error;
2506     }
2507 
2508     for(i = 0; i < shader->num_instrs; i++) {
2509         hr = call_instr_handler(writer, shader->instr[i], buffer);
2510         if(FAILED(hr)) {
2511             goto error;
2512         }
2513     }
2514 
2515     if(FAILED(writer->state)) {
2516         hr = writer->state;
2517         goto error;
2518     }
2519 
2520     writer->funcs->end(writer, shader, buffer);
2521 
2522     if(FAILED(buffer->state)) {
2523         hr = buffer->state;
2524         goto error;
2525     }
2526 
2527     *size = buffer->size * sizeof(DWORD);
2528     *result = buffer->data;
2529     buffer->data = NULL;
2530     hr = S_OK;
2531 
2532 error:
2533     if(buffer) {
2534         d3dcompiler_free(buffer->data);
2535         d3dcompiler_free(buffer);
2536     }
2537     d3dcompiler_free(writer);
2538     return hr;
2539 }
2540 
2541 void SlDeleteShader(struct bwriter_shader *shader) {
2542     unsigned int i, j;
2543 
2544     TRACE("Deleting shader %p\n", shader);
2545 
2546     for(i = 0; i < shader->num_cf; i++) {
2547         d3dcompiler_free(shader->constF[i]);
2548     }
2549     d3dcompiler_free(shader->constF);
2550     for(i = 0; i < shader->num_ci; i++) {
2551         d3dcompiler_free(shader->constI[i]);
2552     }
2553     d3dcompiler_free(shader->constI);
2554     for(i = 0; i < shader->num_cb; i++) {
2555         d3dcompiler_free(shader->constB[i]);
2556     }
2557     d3dcompiler_free(shader->constB);
2558 
2559     d3dcompiler_free(shader->inputs);
2560     d3dcompiler_free(shader->outputs);
2561     d3dcompiler_free(shader->samplers);
2562 
2563     for(i = 0; i < shader->num_instrs; i++) {
2564         for(j = 0; j < shader->instr[i]->num_srcs; j++) {
2565             d3dcompiler_free(shader->instr[i]->src[j].rel_reg);
2566         }
2567         d3dcompiler_free(shader->instr[i]->src);
2568         d3dcompiler_free(shader->instr[i]->dst.rel_reg);
2569         d3dcompiler_free(shader->instr[i]);
2570     }
2571     d3dcompiler_free(shader->instr);
2572 
2573     d3dcompiler_free(shader);
2574 }
2575