xref: /reactos/dll/directx/wine/d3dx9_36/preshader.c (revision 6760065e)
1 #ifdef __REACTOS__
2 #include "precomp.h"
3 #else
4 /*
5  * Copyright 2016 Paul Gofman
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20  */
21 
22 #include "config.h"
23 #include "wine/port.h"
24 
25 #include "d3dx9_private.h"
26 
27 #include <float.h>
28 #include <assert.h>
29 #endif /* __REACTOS__ */
30 
31 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
32 
33 #ifdef __REACTOS__
34 /* ReactOS FIXME: Insect */
35 #define fmin min
36 #define fmax max
37 #endif
38 
39 enum pres_ops
40 {
41     PRESHADER_OP_NOP,
42     PRESHADER_OP_MOV,
43     PRESHADER_OP_NEG,
44     PRESHADER_OP_RCP,
45     PRESHADER_OP_FRC,
46     PRESHADER_OP_EXP,
47     PRESHADER_OP_LOG,
48     PRESHADER_OP_RSQ,
49     PRESHADER_OP_SIN,
50     PRESHADER_OP_COS,
51     PRESHADER_OP_ASIN,
52     PRESHADER_OP_ACOS,
53     PRESHADER_OP_ATAN,
54     PRESHADER_OP_MIN,
55     PRESHADER_OP_MAX,
56     PRESHADER_OP_LT,
57     PRESHADER_OP_GE,
58     PRESHADER_OP_ADD,
59     PRESHADER_OP_MUL,
60     PRESHADER_OP_ATAN2,
61     PRESHADER_OP_DIV,
62     PRESHADER_OP_CMP,
63     PRESHADER_OP_DOT,
64     PRESHADER_OP_DOTSWIZ6,
65     PRESHADER_OP_DOTSWIZ8,
66 };
67 
68 typedef double (*pres_op_func)(double *args, int n);
69 
70 static double to_signed_nan(double v)
71 {
72     static const union
73     {
74         ULONG64 ulong64_value;
75         double double_value;
76     }
77     signed_nan =
78     {
79         0xfff8000000000000
80     };
81 
82     return isnan(v) ? signed_nan.double_value : v;
83 }
84 
85 static double pres_mov(double *args, int n) {return args[0];}
86 static double pres_add(double *args, int n) {return args[0] + args[1];}
87 static double pres_mul(double *args, int n) {return args[0] * args[1];}
88 static double pres_dot(double *args, int n)
89 {
90     int i;
91     double sum;
92 
93     sum = 0.0;
94     for (i = 0; i < n; ++i)
95         sum += args[i] * args[i + n];
96     return sum;
97 }
98 
99 static double pres_dotswiz6(double *args, int n)
100 {
101     return pres_dot(args, 3);
102 }
103 
104 static double pres_dotswiz8(double *args, int n)
105 {
106     return pres_dot(args, 4);
107 }
108 
109 static double pres_neg(double *args, int n) {return -args[0];}
110 static double pres_rcp(double *args, int n) {return 1.0 / args[0];}
111 static double pres_lt(double *args, int n)  {return args[0] < args[1] ? 1.0 : 0.0;}
112 static double pres_ge(double *args, int n)  {return args[0] >= args[1] ? 1.0 : 0.0;}
113 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);}
114 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);}
115 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);}
116 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];}
117 static double pres_sin(double *args, int n) {return sin(args[0]);}
118 static double pres_cos(double *args, int n) {return cos(args[0]);}
119 static double pres_rsq(double *args, int n)
120 {
121     double v;
122 
123     v = fabs(args[0]);
124     if (v == 0.0)
125         return INFINITY;
126     else
127         return 1.0 / sqrt(v);
128 }
129 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);}
130 static double pres_log(double *args, int n)
131 {
132     double v;
133 
134     v = fabs(args[0]);
135     if (v == 0.0)
136         return 0.0;
137     else
138 #ifdef HAVE_LOG2
139         return log2(v);
140 #else
141         return log(v) / log(2);
142 #endif
143 }
144 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));}
145 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));}
146 static double pres_atan(double *args, int n) {return atan(args[0]);}
147 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);}
148 
149 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever
150  * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */
151 static double pres_div(double *args, int n) {return 0.0;}
152 
153 #define PRES_OPCODE_MASK 0x7ff00000
154 #define PRES_OPCODE_SHIFT 20
155 #define PRES_SCALAR_FLAG 0x80000000
156 #define PRES_NCOMP_MASK  0x0000ffff
157 
158 #define FOURCC_PRES 0x53455250
159 #define FOURCC_CLIT 0x54494c43
160 #define FOURCC_FXLC 0x434c5846
161 #define FOURCC_PRSI 0x49535250
162 #define PRES_SIGN 0x46580000
163 
164 struct op_info
165 {
166     unsigned int opcode;
167     char mnem[16];
168     unsigned int input_count;
169     BOOL func_all_comps;
170     pres_op_func func;
171 };
172 
173 static const struct op_info pres_op_info[] =
174 {
175     {0x000, "nop", 0, 0, NULL    }, /* PRESHADER_OP_NOP */
176     {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */
177     {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */
178     {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */
179     {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */
180     {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */
181     {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */
182     {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */
183     {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */
184     {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */
185     {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */
186     {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */
187     {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */
188     {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */
189     {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */
190     {0x202, "lt",  2, 0, pres_lt }, /* PRESHADER_OP_LT  */
191     {0x203, "ge",  2, 0, pres_ge }, /* PRESHADER_OP_GE  */
192     {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */
193     {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */
194     {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */
195     {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */
196     {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */
197     {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */
198     {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */
199     {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */
200 };
201 
202 enum pres_value_type
203 {
204     PRES_VT_FLOAT,
205     PRES_VT_DOUBLE,
206     PRES_VT_INT,
207     PRES_VT_BOOL,
208     PRES_VT_COUNT
209 };
210 
211 static const struct
212 {
213     unsigned int component_size;
214     enum pres_value_type type;
215 }
216 table_info[] =
217 {
218     {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */
219     {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */
220     {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */
221     {sizeof(BOOL),   PRES_VT_BOOL  }, /* PRES_REGTAB_OBCONST */
222     {sizeof(int),    PRES_VT_INT,  }, /* PRES_REGTAB_OICONST */
223     /* TODO: use double precision for 64 bit */
224     {sizeof(float),  PRES_VT_FLOAT }  /* PRES_REGTAB_TEMP */
225 };
226 
227 static const char *table_symbol[] =
228 {
229     "imm", "c", "oc", "ob", "oi", "r", "(null)",
230 };
231 
232 static const enum pres_reg_tables pres_regset2table[] =
233 {
234     PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
235     PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
236     PRES_REGTAB_CONST,    /* D3DXRS_FLOAT4 */
237     PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
238 };
239 
240 static const enum pres_reg_tables shad_regset2table[] =
241 {
242     PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
243     PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
244     PRES_REGTAB_OCONST,   /* D3DXRS_FLOAT4 */
245     PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
246 };
247 
248 struct d3dx_pres_reg
249 {
250     enum pres_reg_tables table;
251     /* offset is component index, not register index, e. g.
252        offset for component c3.y is 13 (3 * 4 + 1) */
253     unsigned int offset;
254 };
255 
256 struct d3dx_pres_operand
257 {
258     struct d3dx_pres_reg reg;
259     struct d3dx_pres_reg index_reg;
260 };
261 
262 #define MAX_INPUTS_COUNT 8
263 
264 struct d3dx_pres_ins
265 {
266     enum pres_ops op;
267     /* first input argument is scalar,
268        scalar component is propagated */
269     BOOL scalar_op;
270     unsigned int component_count;
271     struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT];
272     struct d3dx_pres_operand output;
273 };
274 
275 struct const_upload_info
276 {
277     BOOL transpose;
278     unsigned int major, minor;
279     unsigned int major_stride;
280     unsigned int major_count;
281     unsigned int count;
282     unsigned int minor_remainder;
283 };
284 
285 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type)
286 {
287     switch (type)
288     {
289         case D3DXPT_FLOAT:
290             return PRES_VT_FLOAT;
291         case D3DXPT_INT:
292             return PRES_VT_INT;
293         case D3DXPT_BOOL:
294             return PRES_VT_BOOL;
295         default:
296             FIXME("Unsupported type %u.\n", type);
297             return PRES_VT_COUNT;
298     }
299 }
300 
301 static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
302 {
303     return table == PRES_REGTAB_OBCONST ? offset : offset >> 2;
304 }
305 
306 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx)
307 {
308     return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2;
309 }
310 
311 static unsigned int get_reg_components(unsigned int table)
312 {
313     return get_offset_reg(table, 1);
314 }
315 
316 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
317 
318 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
319 {
320     unsigned int size;
321 
322     size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size;
323     if (size)
324     {
325         rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
326         if (!rs->tables[table])
327             return E_OUTOFMEMORY;
328     }
329     return D3D_OK;
330 }
331 
332 static void regstore_free_tables(struct d3dx_regstore *rs)
333 {
334     unsigned int i;
335 
336     for (i = 0; i < PRES_REGTAB_COUNT; ++i)
337     {
338         HeapFree(GetProcessHeap(), 0, rs->tables[i]);
339     }
340 }
341 
342 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data,
343         unsigned int start_offset, unsigned int count)
344 {
345     BYTE *dst = rs->tables[table];
346     const BYTE *src = data;
347     unsigned int size;
348 
349     dst += start_offset * table_info[table].component_size;
350     size = count * table_info[table].component_size;
351     assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst));
352     memcpy(dst, src, size);
353 }
354 
355 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset)
356 {
357     BYTE *p;
358 
359     p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
360     switch (table_info[table].type)
361     {
362         case PRES_VT_FLOAT:
363             return *(float *)p;
364         case PRES_VT_DOUBLE:
365             return *(double *)p;
366         default:
367             FIXME("Unexpected preshader input from table %u.\n", table);
368             return NAN;
369     }
370 }
371 
372 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v)
373 {
374     BYTE *p;
375 
376     p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
377     switch (table_info[table].type)
378     {
379         case PRES_VT_FLOAT : *(float *)p = v; break;
380         case PRES_VT_DOUBLE: *(double *)p = v; break;
381         case PRES_VT_INT   : *(int *)p = lrint(v); break;
382         case PRES_VT_BOOL  : *(BOOL *)p = !!v; break;
383         default:
384             FIXME("Bad type %u.\n", table_info[table].type);
385             break;
386     }
387 }
388 
389 static void dump_bytecode(void *data, unsigned int size)
390 {
391     unsigned int *bytecode = (unsigned int *)data;
392     unsigned int i, j, n;
393 
394     size /= sizeof(*bytecode);
395     i = 0;
396     while (i < size)
397     {
398         n = min(size - i, 8);
399         for (j = 0; j < n; ++j)
400             TRACE("0x%08x,", bytecode[i + j]);
401         i += n;
402         TRACE("\n");
403     }
404 }
405 
406 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count,
407         unsigned int fourcc, unsigned int *size)
408 {
409     /* Provide at least one value in comment section on non-NULL return. */
410     while (count > 2 && (*ptr & 0xffff) == 0xfffe)
411     {
412         unsigned int section_size;
413 
414         section_size = (*ptr >> 16);
415         if (!section_size || section_size + 1 > count)
416             break;
417         if (*(ptr + 1) == fourcc)
418         {
419             *size = section_size;
420             return ptr + 2;
421         }
422         count -= section_size + 1;
423         ptr += section_size + 1;
424     }
425     return NULL;
426 }
427 
428 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg)
429 {
430     static const enum pres_reg_tables reg_table[8] =
431     {
432         PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT,
433         PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP
434     };
435 
436     if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT)
437     {
438         FIXME("Unsupported register table %#x.\n", *ptr);
439         return NULL;
440     }
441 
442     reg->table = reg_table[*ptr++];
443     reg->offset = *ptr++;
444     return ptr;
445 }
446 
447 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr)
448 {
449     if (count < 3 || (*ptr && count < 5))
450     {
451         WARN("Byte code buffer ends unexpectedly, count %u.\n", count);
452         return NULL;
453     }
454 
455     if (*ptr)
456     {
457         if (*ptr != 1)
458         {
459             FIXME("Unknown relative addressing flag, word %#x.\n", *ptr);
460             return NULL;
461         }
462         ptr = parse_pres_reg(ptr + 1, &opr->index_reg);
463         if (!ptr)
464             return NULL;
465     }
466     else
467     {
468         opr->index_reg.table = PRES_REGTAB_COUNT;
469         ++ptr;
470     }
471 
472     ptr = parse_pres_reg(ptr, &opr->reg);
473 
474     if (opr->reg.table == PRES_REGTAB_OBCONST)
475         opr->reg.offset /= 4;
476     return ptr;
477 }
478 
479 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins)
480 {
481     unsigned int ins_code, ins_raw;
482     unsigned int input_count;
483     unsigned int i;
484 
485     if (count < 2)
486     {
487         WARN("Byte code buffer ends unexpectedly.\n");
488         return NULL;
489     }
490 
491     ins_raw = *ptr++;
492     ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT;
493     ins->component_count = ins_raw & PRES_NCOMP_MASK;
494     ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG);
495 
496     if (ins->component_count < 1 || ins->component_count > 4)
497     {
498         FIXME("Unsupported number of components %u.\n", ins->component_count);
499         return NULL;
500     }
501     input_count = *ptr++;
502     count -= 2;
503     for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i)
504         if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count)
505             break;
506     if (i == ARRAY_SIZE(pres_op_info))
507     {
508         FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw);
509         return NULL;
510     }
511     ins->op = i;
512     if (input_count > ARRAY_SIZE(ins->inputs))
513     {
514         FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count,
515                 pres_op_info[i].mnem);
516         return NULL;
517     }
518     for (i = 0; i < input_count; ++i)
519     {
520         unsigned int *p;
521 
522         p = parse_pres_arg(ptr, count, &ins->inputs[i]);
523         if (!p)
524             return NULL;
525         count -= p - ptr;
526         ptr = p;
527     }
528     ptr = parse_pres_arg(ptr, count, &ins->output);
529     if (ins->output.index_reg.table != PRES_REGTAB_COUNT)
530     {
531         FIXME("Relative addressing in output register not supported.\n");
532         return NULL;
533     }
534     if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset
535             + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1))
536             != get_reg_offset(ins->output.reg.table, ins->output.reg.offset))
537     {
538         FIXME("Instructions outputting multiple registers are not supported.\n");
539         return NULL;
540     }
541     return ptr;
542 }
543 
544 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc,
545         WORD *constantinfo_reserved)
546 {
547     const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc);
548 
549     if (!constant)
550     {
551         FIXME("Could not get constant desc.\n");
552         if (constantinfo_reserved)
553             *constantinfo_reserved = 0;
554         return D3DERR_INVALIDCALL;
555     }
556     *desc = constant->desc;
557     if (constantinfo_reserved)
558         *constantinfo_reserved = constant->constantinfo_reserved;
559     return D3D_OK;
560 }
561 
562 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set,
563         struct const_upload_info *info)
564 {
565     struct d3dx_parameter *param = const_set->param;
566     unsigned int table = const_set->table;
567 
568     info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
569             || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS);
570     if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS)
571     {
572         info->major = param->columns;
573         info->minor = param->rows;
574     }
575     else
576     {
577         info->major = param->rows;
578         info->minor = param->columns;
579     }
580 
581     if (get_reg_components(table) == 1)
582     {
583         unsigned int const_length = get_offset_reg(table, const_set->register_count);
584 
585         info->major_stride = info->minor;
586         info->major_count = const_length / info->major_stride;
587         info->minor_remainder = const_length % info->major_stride;
588     }
589     else
590     {
591         info->major_stride = get_reg_components(table);
592         info->major_count = const_set->register_count;
593         info->minor_remainder = 0;
594     }
595     info->count = info->major_count * info->minor + info->minor_remainder;
596 }
597 
598 #define INITIAL_CONST_SET_SIZE 16
599 
600 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set)
601 {
602     if (const_tab->const_set_count >= const_tab->const_set_size)
603     {
604         unsigned int new_size;
605         struct d3dx_const_param_eval_output *new_alloc;
606 
607         if (!const_tab->const_set_size)
608         {
609             new_size = INITIAL_CONST_SET_SIZE;
610             new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
611             if (!new_alloc)
612             {
613                 ERR("Out of memory.\n");
614                 return E_OUTOFMEMORY;
615             }
616         }
617         else
618         {
619             new_size = const_tab->const_set_size * 2;
620             new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
621                     sizeof(*const_tab->const_set) * new_size);
622             if (!new_alloc)
623             {
624                 ERR("Out of memory.\n");
625                 return E_OUTOFMEMORY;
626             }
627         }
628         const_tab->const_set = new_alloc;
629         const_tab->const_set_size = new_size;
630     }
631     const_tab->const_set[const_tab->const_set_count++] = *set;
632     return D3D_OK;
633 }
634 
635 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab,
636         struct d3dx_preshader *pres)
637 {
638     unsigned int i;
639     struct d3dx_const_param_eval_output const_set = {NULL};
640 
641     for (i = 0; i < pres->ins_count; ++i)
642     {
643         const struct d3dx_pres_ins *ins = &pres->ins[i];
644         const struct d3dx_pres_reg *reg = &ins->output.reg;
645 
646         if (reg->table == PRES_REGTAB_TEMP)
647             continue;
648 
649         const_set.register_index = get_reg_offset(reg->table, reg->offset);
650         const_set.register_count = 1;
651         const_set.table = reg->table;
652         const_set.constant_class = D3DXPC_FORCE_DWORD;
653         const_set.element_count = 1;
654         append_const_set(const_tab, &const_set);
655     }
656 }
657 
658 static int compare_const_set(const void *a, const void *b)
659 {
660     const struct d3dx_const_param_eval_output *r1 = a;
661     const struct d3dx_const_param_eval_output *r2 = b;
662 
663     if (r1->table != r2->table)
664         return r1->table - r2->table;
665     return r1->register_index - r2->register_index;
666 }
667 
668 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab,
669         struct d3dx_parameter *param, unsigned int index)
670 {
671     unsigned int i, start_index = index;
672     DWORD *current_data;
673     enum pres_reg_tables current_table;
674     unsigned int current_start_offset, element_count;
675     struct d3dx_const_param_eval_output *first_const;
676 
677     if (!const_tab->const_set_count)
678         return D3D_OK;
679 
680     while (index < const_tab->const_set_count - 1)
681     {
682         first_const = &const_tab->const_set[index];
683         current_data = first_const->param->data;
684         current_table = first_const->table;
685         current_start_offset = get_offset_reg(current_table, first_const->register_index);
686         element_count = 0;
687         for (i = index; i < const_tab->const_set_count; ++i)
688         {
689             struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i];
690             unsigned int count = get_offset_reg(const_set->table,
691                     const_set->register_count * const_set->element_count);
692             unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index);
693 
694             if (!(const_set->table == current_table && current_start_offset == start_offset
695                     && const_set->direct_copy == first_const->direct_copy
696                     && current_data == const_set->param->data
697                     && (const_set->direct_copy || (first_const->param->type == const_set->param->type
698                     && first_const->param->class == const_set->param->class
699                     && first_const->param->columns == const_set->param->columns
700                     && first_const->param->rows == const_set->param->rows
701                     && first_const->register_count == const_set->register_count
702                     && (i == const_tab->const_set_count - 1
703                     || first_const->param->element_count == const_set->param->element_count)))))
704                 break;
705 
706             current_start_offset += count;
707             current_data += const_set->direct_copy ? count : const_set->param->rows
708                     * const_set->param->columns * const_set->element_count;
709             element_count += const_set->element_count;
710         }
711 
712         if (i > index + 1)
713         {
714             TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index,
715                     debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy);
716 
717             first_const->element_count = element_count;
718             if (first_const->direct_copy)
719             {
720                 first_const->element_count = 1;
721                 if (index == start_index
722                         && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT))
723                 {
724                     if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
725                         return D3DERR_INVALIDCALL;
726                     first_const->param = param;
727                 }
728                 first_const->register_count = get_reg_offset(current_table, current_start_offset)
729                         - first_const->register_index;
730             }
731             memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i],
732                     sizeof(*const_tab->const_set) * (const_tab->const_set_count - i));
733             const_tab->const_set_count -= i - index - 1;
734         }
735         else
736         {
737             TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n",
738                     const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy);
739         }
740         index = i;
741     }
742     return D3D_OK;
743 }
744 
745 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
746         D3DXHANDLE hc, struct d3dx_parameter *param)
747 {
748     D3DXCONSTANT_DESC desc;
749     unsigned int const_count, param_count, i;
750     BOOL get_element;
751     struct d3dx_const_param_eval_output const_set;
752     struct const_upload_info info;
753     enum pres_value_type table_type;
754     HRESULT hr;
755 
756     if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL)))
757         return D3DERR_INVALIDCALL;
758 
759     if (param->element_count)
760     {
761         param_count = param->element_count;
762         const_count = desc.Elements;
763         get_element = TRUE;
764     }
765     else
766     {
767         if (desc.Elements > 1)
768         {
769             FIXME("Unexpected number of constant elements %u.\n", desc.Elements);
770             return D3DERR_INVALIDCALL;
771         }
772         param_count = param->member_count;
773         const_count = desc.StructMembers;
774         get_element = FALSE;
775     }
776     if (const_count != param_count)
777     {
778         FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n",
779                 param_count, const_count);
780         return D3DERR_INVALIDCALL;
781     }
782     if (const_count)
783     {
784         HRESULT ret = D3D_OK;
785         D3DXHANDLE hc_element;
786         unsigned int index = const_tab->const_set_count;
787 
788         for (i = 0; i < const_count; ++i)
789         {
790             if (get_element)
791                 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i);
792             else
793                 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i);
794             if (!hc_element)
795             {
796                 FIXME("Could not get constant.\n");
797                 hr = D3DERR_INVALIDCALL;
798             }
799             else
800             {
801                 hr = init_set_constants_param(const_tab, ctab, hc_element, &param->members[i]);
802             }
803             if (FAILED(hr))
804                 ret = hr;
805         }
806         if (FAILED(ret))
807             return ret;
808         return merge_const_set_entries(const_tab, param, index);
809     }
810 
811     TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n",
812             debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes);
813     TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n",
814             debugstr_a(param->name), param->rows, param->columns, param->class,
815             param->flags, param->bytes);
816 
817     const_set.element_count = 1;
818     const_set.param = param;
819     const_set.constant_class = desc.Class;
820     if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table))
821     {
822         FIXME("Unknown register set %u.\n", desc.RegisterSet);
823         return D3DERR_INVALIDCALL;
824     }
825     const_set.register_index = desc.RegisterIndex;
826     const_set.table = const_tab->regset2table[desc.RegisterSet];
827     if (const_set.table >= PRES_REGTAB_COUNT)
828     {
829         ERR("Unexpected register set %u.\n", desc.RegisterSet);
830         return D3DERR_INVALIDCALL;
831     }
832     assert(table_info[const_set.table].component_size == sizeof(unsigned int));
833     assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int));
834     const_set.register_count = desc.RegisterCount;
835     table_type = table_info[const_set.table].type;
836     get_const_upload_info(&const_set, &info);
837     if (!info.count)
838     {
839         TRACE("%s has zero count, skipping.\n", debugstr_a(param->name));
840         return D3D_OK;
841     }
842 
843     if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
844         return D3DERR_INVALIDCALL;
845 
846     const_set.direct_copy = table_type_from_param_type(param->type) == table_type
847             && !info.transpose && info.minor == info.major_stride
848             && info.count == get_offset_reg(const_set.table, const_set.register_count)
849             && info.count * sizeof(unsigned int) <= param->bytes;
850     if (info.minor_remainder && !const_set.direct_copy && !info.transpose)
851         FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n",
852                 debugstr_a(param->name));
853 
854     if (info.major_count > info.major
855             || (info.major_count == info.major && info.minor_remainder))
856     {
857         WARN("Constant dimensions exceed parameter size.\n");
858         return D3DERR_INVALIDCALL;
859     }
860 
861     if (FAILED(hr = append_const_set(const_tab, &const_set)))
862         return hr;
863 
864     return D3D_OK;
865 }
866 
867 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out,
868         struct d3dx9_base_effect *base, const char **skip_constants,
869         unsigned int skip_constants_count, struct d3dx_preshader *pres)
870 {
871     ID3DXConstantTable *ctab;
872     D3DXCONSTANT_DESC *cdesc;
873     struct d3dx_parameter **inputs_param;
874     D3DXCONSTANTTABLE_DESC desc;
875     HRESULT hr;
876     D3DXHANDLE hc;
877     unsigned int i, j;
878 
879     hr = D3DXGetShaderConstantTable(byte_code, &ctab);
880     if (FAILED(hr) || !ctab)
881     {
882         TRACE("Could not get CTAB data, hr %#x.\n", hr);
883         /* returning OK, shaders and preshaders without CTAB are valid */
884         return D3D_OK;
885     }
886     if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc)))
887     {
888         FIXME("Could not get CTAB desc, hr %#x.\n", hr);
889         goto cleanup;
890     }
891 
892     out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants);
893     out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants);
894     if (!cdesc || !inputs_param)
895     {
896         hr = E_OUTOFMEMORY;
897         goto cleanup;
898     }
899 
900     for (i = 0; i < desc.Constants; ++i)
901     {
902         unsigned int index = out->input_count;
903         WORD constantinfo_reserved;
904 
905         hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
906         if (!hc)
907         {
908             FIXME("Null constant handle.\n");
909             goto cleanup;
910         }
911         if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved)))
912             goto cleanup;
913         inputs_param[index] = get_parameter_by_name(base, NULL, cdesc[index].Name);
914         if (!inputs_param[index])
915         {
916             WARN("Could not find parameter %s in effect.\n", cdesc[index].Name);
917             continue;
918         }
919         if (cdesc[index].Class == D3DXPC_OBJECT)
920         {
921             TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]);
922             if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT
923                     || !is_param_type_sampler(inputs_param[index]->type))
924             {
925                 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name));
926                 hr = D3DERR_INVALIDCALL;
927                 goto cleanup;
928             }
929             if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount)
930             {
931                 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name));
932                 hr = D3DERR_INVALIDCALL;
933                 goto cleanup;
934             }
935         }
936         if (!is_top_level_parameter(inputs_param[index]))
937         {
938             WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name));
939             hr = E_FAIL;
940             goto cleanup;
941         }
942 
943         for (j = 0; j < skip_constants_count; ++j)
944         {
945             if (!strcmp(cdesc[index].Name, skip_constants[j]))
946             {
947                 if (!constantinfo_reserved)
948                 {
949                     WARN("skip_constants parameter %s is not register bound.\n",
950                             cdesc[index].Name);
951                     hr = D3DERR_INVALIDCALL;
952                     goto cleanup;
953                 }
954                 TRACE("Skipping constant %s.\n", cdesc[index].Name);
955                 break;
956             }
957         }
958         if (j < skip_constants_count)
959             continue;
960         ++out->input_count;
961         if (inputs_param[index]->class == D3DXPC_OBJECT)
962             continue;
963         if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index])))
964             goto cleanup;
965     }
966     if (pres)
967         append_pres_const_sets_for_shader_input(out, pres);
968     if (out->const_set_count)
969     {
970         struct d3dx_const_param_eval_output *new_alloc;
971 
972         qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set);
973 
974         i = 0;
975         while (i < out->const_set_count - 1)
976         {
977             if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD
978                     && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD
979                     && out->const_set[i].table == out->const_set[i + 1].table
980                     && out->const_set[i].register_index + out->const_set[i].register_count
981                     >= out->const_set[i + 1].register_index)
982             {
983                 assert(out->const_set[i].register_index + out->const_set[i].register_count
984                         <= out->const_set[i + 1].register_index + 1);
985                 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1
986                         - out->const_set[i].register_index;
987                 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i])
988                         * (out->const_set_count - i - 2));
989                 --out->const_set_count;
990             }
991             else
992             {
993                 ++i;
994             }
995         }
996 
997         new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set,
998                 sizeof(*out->const_set) * out->const_set_count);
999         if (new_alloc)
1000         {
1001             out->const_set = new_alloc;
1002             out->const_set_size = out->const_set_count;
1003         }
1004         else
1005         {
1006             WARN("Out of memory.\n");
1007         }
1008     }
1009 cleanup:
1010     ID3DXConstantTable_Release(ctab);
1011     return hr;
1012 }
1013 
1014 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register)
1015 {
1016     if (table < PRES_REGTAB_COUNT)
1017         table_sizes[table] = max(table_sizes[table], max_register + 1);
1018 }
1019 
1020 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab)
1021 {
1022     unsigned int i, table, max_register;
1023 
1024     for (i = 0; i < ctab->input_count; ++i)
1025     {
1026         if (!ctab->inputs[i].RegisterCount)
1027             continue;
1028         max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1;
1029         table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1030         update_table_size(table_sizes, table, max_register);
1031     }
1032 }
1033 
1034 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count)
1035 {
1036     static const char *xyzw_str = "xyzw";
1037     unsigned int i, table;
1038 
1039     table = arg->reg.table;
1040     if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT)
1041     {
1042         TRACE("(");
1043         for (i = 0; i < component_count; ++i)
1044             TRACE(i < component_count - 1 ? "%.16e, " : "%.16e",
1045                     ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]);
1046         TRACE(")");
1047     }
1048     else
1049     {
1050         if (arg->index_reg.table == PRES_REGTAB_COUNT)
1051         {
1052             TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset));
1053         }
1054         else
1055         {
1056             unsigned int index_reg;
1057 
1058             index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset);
1059             TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset),
1060                     table_symbol[arg->index_reg.table], index_reg,
1061                     xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]);
1062         }
1063         for (i = 0; i < component_count; ++i)
1064             TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]);
1065     }
1066 }
1067 
1068 static void dump_registers(struct d3dx_const_tab *ctab)
1069 {
1070     unsigned int table, i;
1071 
1072     for (i = 0; i < ctab->input_count; ++i)
1073     {
1074         table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1075         TRACE("//   %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)",
1076                 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount);
1077     }
1078 }
1079 
1080 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins)
1081 {
1082     unsigned int i;
1083 
1084     TRACE("%s ", pres_op_info[ins->op].mnem);
1085     dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count);
1086     for (i = 0; i < pres_op_info[ins->op].input_count; ++i)
1087     {
1088         TRACE(", ");
1089         dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count);
1090     }
1091     TRACE("\n");
1092 }
1093 
1094 static void dump_preshader(struct d3dx_preshader *pres)
1095 {
1096     unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4;
1097     const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED];
1098 
1099     if (immediate_count)
1100         TRACE("// Immediates:\n");
1101     for (i = 0; i < immediate_count; ++i)
1102     {
1103         if (!(i % 4))
1104             TRACE("// ");
1105         TRACE("%.8e", immediates[i]);
1106         if (i % 4 == 3)
1107             TRACE("\n");
1108         else
1109             TRACE(", ");
1110     }
1111     TRACE("// Preshader registers:\n");
1112     dump_registers(&pres->inputs);
1113     TRACE("preshader\n");
1114     for (i = 0; i < pres->ins_count; ++i)
1115         dump_ins(&pres->regs, &pres->ins[i]);
1116 }
1117 
1118 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base)
1119 {
1120     unsigned int *p;
1121     unsigned int i, j, const_count;
1122     double *dconst;
1123     HRESULT hr;
1124     unsigned int saved_word;
1125     unsigned int section_size;
1126 
1127     TRACE("Preshader version %#x.\n", *ptr & 0xffff);
1128 
1129     if (!count)
1130     {
1131         WARN("Unexpected end of byte code buffer.\n");
1132         return D3DXERR_INVALIDDATA;
1133     }
1134 
1135     p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, &section_size);
1136     if (p)
1137     {
1138         const_count = *p++;
1139         if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int)))
1140         {
1141             WARN("Byte code buffer ends unexpectedly.\n");
1142             return D3DXERR_INVALIDDATA;
1143         }
1144         dconst = (double *)p;
1145     }
1146     else
1147     {
1148         const_count = 0;
1149         dconst = NULL;
1150     }
1151     TRACE("%u double constants.\n", const_count);
1152 
1153     p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, &section_size);
1154     if (!p)
1155     {
1156         WARN("Could not find preshader code.\n");
1157         return D3D_OK;
1158     }
1159     pres->ins_count = *p++;
1160     --section_size;
1161     if (pres->ins_count > UINT_MAX / sizeof(*pres->ins))
1162     {
1163         WARN("Invalid instruction count %u.\n", pres->ins_count);
1164         return D3DXERR_INVALIDDATA;
1165     }
1166     TRACE("%u instructions.\n", pres->ins_count);
1167     pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count);
1168     if (!pres->ins)
1169         return E_OUTOFMEMORY;
1170     for (i = 0; i < pres->ins_count; ++i)
1171     {
1172         unsigned int *ptr_next;
1173 
1174         ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]);
1175         if (!ptr_next)
1176             return D3DXERR_INVALIDDATA;
1177         section_size -= ptr_next - p;
1178         p = ptr_next;
1179     }
1180 
1181     pres->inputs.regset2table = pres_regset2table;
1182 
1183     saved_word = *ptr;
1184     *ptr = 0xfffe0000;
1185     hr = get_constants_desc(ptr, &pres->inputs, base, NULL, 0, NULL);
1186     *ptr = saved_word;
1187     if (FAILED(hr))
1188         return hr;
1189 
1190     if (const_count % get_reg_components(PRES_REGTAB_IMMED))
1191     {
1192         FIXME("const_count %u is not a multiple of %u.\n", const_count,
1193                 get_reg_components(PRES_REGTAB_IMMED));
1194         return D3DXERR_INVALIDDATA;
1195     }
1196     pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count);
1197 
1198     update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs);
1199     for (i = 0; i < pres->ins_count; ++i)
1200     {
1201         for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j)
1202         {
1203             enum pres_reg_tables table;
1204             unsigned int reg_idx;
1205 
1206             if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT)
1207             {
1208                 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0
1209                         : pres->ins[i].component_count - 1;
1210 
1211                 table = pres->ins[i].inputs[j].reg.table;
1212                 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset
1213                         + last_component_index);
1214             }
1215             else
1216             {
1217                 table = pres->ins[i].inputs[j].index_reg.table;
1218                 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset);
1219             }
1220             if (reg_idx >= pres->regs.table_sizes[table])
1221             {
1222                 /* Native accepts these broken preshaders. */
1223                 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u, preshader parsing failed.\n",
1224                         i, j, table, reg_idx);
1225                 return D3DXERR_INVALIDDATA;
1226             }
1227         }
1228         update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table,
1229                 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset));
1230     }
1231     if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED)))
1232         return E_OUTOFMEMORY;
1233     regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count);
1234 
1235     return D3D_OK;
1236 }
1237 
1238 HRESULT d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size,
1239         D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter,
1240         const char **skip_constants, unsigned int skip_constants_count)
1241 {
1242     struct d3dx_param_eval *peval;
1243     unsigned int *ptr, *shader_ptr = NULL;
1244     unsigned int i;
1245     BOOL shader;
1246     unsigned int count, pres_size;
1247     HRESULT ret;
1248 
1249     TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n",
1250             base_effect, byte_code, byte_code_size, type, peval_out);
1251 
1252     count = byte_code_size / sizeof(unsigned int);
1253     if (!byte_code || !count)
1254     {
1255         *peval_out = NULL;
1256         return D3D_OK;
1257     }
1258 
1259     peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval));
1260     if (!peval)
1261     {
1262         ret = E_OUTOFMEMORY;
1263         goto err_out;
1264     }
1265     peval->version_counter = version_counter;
1266 
1267     peval->param_type = type;
1268     switch (type)
1269     {
1270         case D3DXPT_VERTEXSHADER:
1271         case D3DXPT_PIXELSHADER:
1272             shader = TRUE;
1273             break;
1274         default:
1275             shader = FALSE;
1276             break;
1277     }
1278     peval->shader_inputs.regset2table = shad_regset2table;
1279 
1280     ptr = (unsigned int *)byte_code;
1281     if (shader)
1282     {
1283         if ((*ptr & 0xfffe0000) != 0xfffe0000)
1284         {
1285             FIXME("Invalid shader signature %#x.\n", *ptr);
1286             ret = D3DXERR_INVALIDDATA;
1287             goto err_out;
1288         }
1289         TRACE("Shader version %#x.\n", *ptr & 0xffff);
1290         shader_ptr = ptr;
1291         ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size);
1292         if (!ptr)
1293             TRACE("No preshader found.\n");
1294     }
1295     else
1296     {
1297         pres_size = count;
1298     }
1299 
1300     if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, base_effect)))
1301     {
1302         FIXME("Failed parsing preshader, byte code for analysis follows.\n");
1303         dump_bytecode(byte_code, byte_code_size);
1304         goto err_out;
1305     }
1306 
1307     if (shader)
1308     {
1309         if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, base_effect,
1310                 skip_constants, skip_constants_count, &peval->pres)))
1311         {
1312             TRACE("Could not get shader constant table, hr %#x.\n", ret);
1313             goto err_out;
1314         }
1315         update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs);
1316     }
1317 
1318     for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i)
1319     {
1320         if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i)))
1321             goto err_out;
1322     }
1323 
1324     if (TRACE_ON(d3dx))
1325     {
1326         dump_bytecode(byte_code, byte_code_size);
1327         dump_preshader(&peval->pres);
1328         if (shader)
1329         {
1330             TRACE("// Shader registers:\n");
1331             dump_registers(&peval->shader_inputs);
1332         }
1333     }
1334     *peval_out = peval;
1335     TRACE("Created parameter evaluator %p.\n", *peval_out);
1336     return D3D_OK;
1337 
1338 err_out:
1339     WARN("Error creating parameter evaluator.\n");
1340     if (TRACE_ON(d3dx))
1341         dump_bytecode(byte_code, byte_code_size);
1342 
1343     d3dx_free_param_eval(peval);
1344     *peval_out = NULL;
1345     return ret;
1346 }
1347 
1348 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
1349 {
1350     HeapFree(GetProcessHeap(), 0, ctab->inputs);
1351     HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
1352     HeapFree(GetProcessHeap(), 0, ctab->const_set);
1353 }
1354 
1355 static void d3dx_free_preshader(struct d3dx_preshader *pres)
1356 {
1357     HeapFree(GetProcessHeap(), 0, pres->ins);
1358 
1359     regstore_free_tables(&pres->regs);
1360     d3dx_free_const_tab(&pres->inputs);
1361 }
1362 
1363 void d3dx_free_param_eval(struct d3dx_param_eval *peval)
1364 {
1365     TRACE("peval %p.\n", peval);
1366 
1367     if (!peval)
1368         return;
1369 
1370     d3dx_free_preshader(&peval->pres);
1371     d3dx_free_const_tab(&peval->shader_inputs);
1372     HeapFree(GetProcessHeap(), 0, peval);
1373 }
1374 
1375 static void pres_int_from_float(void *out, const void *in, unsigned int count)
1376 {
1377     unsigned int i;
1378     const float *in_float = in;
1379     int *out_int = out;
1380 
1381     for (i = 0; i < count; ++i)
1382         out_int[i] = in_float[i];
1383 }
1384 
1385 static void pres_bool_from_value(void *out, const void *in, unsigned int count)
1386 {
1387     unsigned int i;
1388     const DWORD *in_dword = in;
1389     BOOL *out_bool = out;
1390 
1391     for (i = 0; i < count; ++i)
1392         out_bool[i] = !!in_dword[i];
1393 }
1394 
1395 static void pres_float_from_int(void *out, const void *in, unsigned int count)
1396 {
1397     unsigned int i;
1398     const int *in_int = in;
1399     float *out_float = out;
1400 
1401     for (i = 0; i < count; ++i)
1402         out_float[i] = in_int[i];
1403 }
1404 
1405 static void pres_float_from_bool(void *out, const void *in, unsigned int count)
1406 {
1407     unsigned int i;
1408     const BOOL *in_bool = in;
1409     float *out_float = out;
1410 
1411     for (i = 0; i < count; ++i)
1412         out_float[i] = !!in_bool[i];
1413 }
1414 
1415 static void pres_int_from_bool(void *out, const void *in, unsigned int count)
1416 {
1417     unsigned int i;
1418     const float *in_bool = in;
1419     int *out_int = out;
1420 
1421     for (i = 0; i < count; ++i)
1422         out_int[i] = !!in_bool[i];
1423 }
1424 
1425 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table,
1426         unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type)
1427 {
1428     typedef void (*conv_func)(void *out, const void *in, unsigned int count);
1429     static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] =
1430     {
1431         {NULL,                 NULL, pres_int_from_float, pres_bool_from_value},
1432         {NULL,                 NULL, NULL,                NULL},
1433         {pres_float_from_int,  NULL, NULL,                pres_bool_from_value},
1434         {pres_float_from_bool, NULL, pres_int_from_bool,  NULL}
1435     };
1436     enum pres_value_type table_type = table_info[table].type;
1437 
1438     if (param_type == table_type)
1439     {
1440         regstore_set_values(rs, table, in, offset, count);
1441         return;
1442     }
1443 
1444     set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count);
1445 }
1446 
1447 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1448         D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr,
1449         unsigned int start, unsigned int count)
1450 {
1451     if (type == D3DXPT_VERTEXSHADER)
1452     {
1453         switch(table)
1454         {
1455             case PRES_REGTAB_OCONST:
1456                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count);
1457             case PRES_REGTAB_OICONST:
1458                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count);
1459             case PRES_REGTAB_OBCONST:
1460                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count);
1461             default:
1462                 FIXME("Unexpected register table %u.\n", table);
1463                 return D3DERR_INVALIDCALL;
1464         }
1465     }
1466     else if (type == D3DXPT_PIXELSHADER)
1467     {
1468         switch(table)
1469         {
1470             case PRES_REGTAB_OCONST:
1471                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count);
1472             case PRES_REGTAB_OICONST:
1473                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count);
1474             case PRES_REGTAB_OBCONST:
1475                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count);
1476             default:
1477                 FIXME("Unexpected register table %u.\n", table);
1478                 return D3DERR_INVALIDCALL;
1479         }
1480     }
1481     else
1482     {
1483         FIXME("Unexpected parameter type %u.\n", type);
1484         return D3DERR_INVALIDCALL;
1485     }
1486 }
1487 
1488 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
1489         ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1490         D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty)
1491 {
1492     unsigned int const_idx;
1493     unsigned int current_start = 0, current_count = 0;
1494     enum pres_reg_tables current_table = PRES_REGTAB_COUNT;
1495     BOOL update_device = manager || device;
1496     HRESULT hr, result = D3D_OK;
1497     ULONG64 update_version = const_tab->update_version;
1498 
1499     for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1500     {
1501         struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1502         enum pres_reg_tables table = const_set->table;
1503         struct d3dx_parameter *param = const_set->param;
1504         unsigned int element, i, j, start_offset;
1505         struct const_upload_info info;
1506         unsigned int *data;
1507         enum pres_value_type param_type;
1508 
1509         if (!(param && is_param_dirty(param, update_version)))
1510             continue;
1511 
1512         data = param->data;
1513         start_offset = get_offset_reg(table, const_set->register_index);
1514         if (const_set->direct_copy)
1515         {
1516             regstore_set_values(rs, table, data, start_offset,
1517                     get_offset_reg(table, const_set->register_count));
1518             continue;
1519         }
1520         param_type = table_type_from_param_type(param->type);
1521         if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR)
1522         {
1523             unsigned int count = max(param->rows, param->columns);
1524 
1525             if (count >= get_reg_components(table))
1526             {
1527                 regstore_set_data(rs, table, start_offset, data,
1528                         count * const_set->element_count, param_type);
1529             }
1530             else
1531             {
1532                 for (element = 0; element < const_set->element_count; ++element)
1533                     regstore_set_data(rs, table, start_offset + get_offset_reg(table, element),
1534                             &data[element * count], count, param_type);
1535             }
1536             continue;
1537         }
1538         get_const_upload_info(const_set, &info);
1539         for (element = 0; element < const_set->element_count; ++element)
1540         {
1541             unsigned int *out = (unsigned int *)rs->tables[table] + start_offset;
1542 
1543             /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer.
1544              * All the supported types of parameters and table values have the same size. */
1545             if (info.transpose)
1546             {
1547                 for (i = 0; i < info.major_count; ++i)
1548                     for (j = 0; j < info.minor; ++j)
1549                         out[i * info.major_stride + j] = data[i + j * info.major];
1550 
1551                 for (j = 0; j < info.minor_remainder; ++j)
1552                     out[i * info.major_stride + j] = data[i + j * info.major];
1553             }
1554             else
1555             {
1556                 for (i = 0; i < info.major_count; ++i)
1557                     for (j = 0; j < info.minor; ++j)
1558                         out[i * info.major_stride + j] = data[i * info.minor + j];
1559             }
1560             start_offset += get_offset_reg(table, const_set->register_count);
1561             data += param->rows * param->columns;
1562         }
1563         start_offset = get_offset_reg(table, const_set->register_index);
1564         if (table_info[table].type != param_type)
1565             regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset,
1566                     get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type);
1567     }
1568     const_tab->update_version = new_update_version;
1569     if (!update_device)
1570         return D3D_OK;
1571 
1572     for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1573     {
1574         struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1575 
1576         if (device_update_all || (const_set->param
1577                 ? is_param_dirty(const_set->param, update_version) : pres_dirty))
1578         {
1579             enum pres_reg_tables table = const_set->table;
1580 
1581             if (table == current_table && current_start + current_count == const_set->register_index)
1582             {
1583                 current_count += const_set->register_count * const_set->element_count;
1584             }
1585             else
1586             {
1587                 if (current_count)
1588                 {
1589                     if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1590                             (DWORD *)rs->tables[current_table]
1591                             + get_offset_reg(current_table, current_start), current_start, current_count)))
1592                         result = hr;
1593                 }
1594                 current_table = table;
1595                 current_start = const_set->register_index;
1596                 current_count = const_set->register_count * const_set->element_count;
1597             }
1598         }
1599     }
1600     if (current_count)
1601     {
1602         if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1603                 (DWORD *)rs->tables[current_table]
1604                 + get_offset_reg(current_table, current_start), current_start, current_count)))
1605             result = hr;
1606     }
1607     return result;
1608 }
1609 
1610 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset)
1611 {
1612     return regstore_get_double(rs, table, offset);
1613 }
1614 
1615 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp)
1616 {
1617     unsigned int offset, base_index, reg_index, table;
1618 
1619     table = opr->reg.table;
1620 
1621     if (opr->index_reg.table == PRES_REGTAB_COUNT)
1622         base_index = 0;
1623     else
1624         base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset));
1625 
1626     offset = get_offset_reg(table, base_index) + opr->reg.offset + comp;
1627     reg_index = get_reg_offset(table, offset);
1628 
1629     if (reg_index >= rs->table_sizes[table])
1630     {
1631         unsigned int wrap_size;
1632 
1633         if (table == PRES_REGTAB_CONST)
1634         {
1635             /* As it can be guessed from tests, offset into floating constant table is wrapped
1636              * to the nearest power of 2 and not to the actual table size. */
1637             for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1)
1638                 ;
1639         }
1640         else
1641         {
1642             wrap_size = rs->table_sizes[table];
1643         }
1644         WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n",
1645                 reg_index, table, wrap_size, rs->table_sizes[table]);
1646         reg_index %= wrap_size;
1647 
1648         if (reg_index >= rs->table_sizes[table])
1649             return 0.0;
1650 
1651         offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table);
1652     }
1653 
1654     return exec_get_reg_value(rs, table, offset);
1655 }
1656 
1657 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg,
1658         unsigned int comp, double res)
1659 {
1660     regstore_set_double(rs, reg->table, reg->offset + comp, res);
1661 }
1662 
1663 #define ARGS_ARRAY_SIZE 8
1664 static HRESULT execute_preshader(struct d3dx_preshader *pres)
1665 {
1666     unsigned int i, j, k;
1667     double args[ARGS_ARRAY_SIZE];
1668     double res;
1669 
1670     for (i = 0; i < pres->ins_count; ++i)
1671     {
1672         const struct d3dx_pres_ins *ins;
1673         const struct op_info *oi;
1674 
1675         ins = &pres->ins[i];
1676         oi = &pres_op_info[ins->op];
1677         if (oi->func_all_comps)
1678         {
1679             if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE)
1680             {
1681                 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count);
1682                 return E_FAIL;
1683             }
1684             for (k = 0; k < oi->input_count; ++k)
1685                 for (j = 0; j < ins->component_count; ++j)
1686                     args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k],
1687                             ins->scalar_op && !k ? 0 : j);
1688             res = oi->func(args, ins->component_count);
1689 
1690             /* only 'dot' instruction currently falls here */
1691             exec_set_arg(&pres->regs, &ins->output.reg, 0, res);
1692         }
1693         else
1694         {
1695             for (j = 0; j < ins->component_count; ++j)
1696             {
1697                 for (k = 0; k < oi->input_count; ++k)
1698                     args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j);
1699                 res = oi->func(args, ins->component_count);
1700                 exec_set_arg(&pres->regs, &ins->output.reg, j, res);
1701             }
1702         }
1703     }
1704     return D3D_OK;
1705 }
1706 
1707 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version)
1708 {
1709     unsigned int i;
1710 
1711     if (update_version == ULONG64_MAX)
1712         update_version = ctab->update_version;
1713     for (i = 0; i < ctab->input_count; ++i)
1714     {
1715         if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]),
1716                 update_version))
1717             return TRUE;
1718     }
1719     return FALSE;
1720 }
1721 
1722 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version)
1723 {
1724     return is_const_tab_input_dirty(&peval->pres.inputs, update_version)
1725             || is_const_tab_input_dirty(&peval->shader_inputs, update_version);
1726 }
1727 
1728 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param,
1729         void *param_value)
1730 {
1731     HRESULT hr;
1732     unsigned int i;
1733     unsigned int elements, elements_param, elements_table;
1734     float *oc;
1735 
1736     TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
1737 
1738     if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX))
1739     {
1740         set_constants(&peval->pres.regs, &peval->pres.inputs,
1741                 next_update_version(peval->version_counter),
1742                 NULL, NULL, peval->param_type, FALSE, FALSE);
1743 
1744         if (FAILED(hr = execute_preshader(&peval->pres)))
1745             return hr;
1746     }
1747 
1748     elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]);
1749     elements_param = param->bytes / sizeof(unsigned int);
1750     elements = min(elements_table, elements_param);
1751     oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST];
1752     for (i = 0; i < elements; ++i)
1753         set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT);
1754     return D3D_OK;
1755 }
1756 
1757 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1758         struct d3dx_param_eval *peval, BOOL update_all)
1759 {
1760     HRESULT hr;
1761     struct d3dx_preshader *pres = &peval->pres;
1762     struct d3dx_regstore *rs = &pres->regs;
1763     ULONG64 new_update_version = next_update_version(peval->version_counter);
1764     BOOL pres_dirty = FALSE;
1765 
1766     TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
1767 
1768     if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX))
1769     {
1770         set_constants(rs, &pres->inputs, new_update_version,
1771                 NULL, NULL, peval->param_type, FALSE, FALSE);
1772         if (FAILED(hr = execute_preshader(pres)))
1773             return hr;
1774         pres_dirty = TRUE;
1775     }
1776 
1777     return set_constants(rs, &peval->shader_inputs, new_update_version,
1778             manager, device, peval->param_type, update_all, pres_dirty);
1779 }
1780