xref: /reactos/dll/directx/wine/d3dx9_36/preshader.c (revision a6726659)
1 /*
2  * Copyright 2016 Paul Gofman
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17  */
18 
19 #include "config.h"
20 #include "wine/port.h"
21 
22 #include "d3dx9_private.h"
23 
24 #include <float.h>
25 #include <assert.h>
26 
27 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
28 
29 /* ReactOS FIXME: Insect */
30 #define fmin min
31 #define fmax max
32 
33 enum pres_ops
34 {
35     PRESHADER_OP_NOP,
36     PRESHADER_OP_MOV,
37     PRESHADER_OP_NEG,
38     PRESHADER_OP_RCP,
39     PRESHADER_OP_FRC,
40     PRESHADER_OP_EXP,
41     PRESHADER_OP_LOG,
42     PRESHADER_OP_RSQ,
43     PRESHADER_OP_SIN,
44     PRESHADER_OP_COS,
45     PRESHADER_OP_ASIN,
46     PRESHADER_OP_ACOS,
47     PRESHADER_OP_ATAN,
48     PRESHADER_OP_MIN,
49     PRESHADER_OP_MAX,
50     PRESHADER_OP_LT,
51     PRESHADER_OP_GE,
52     PRESHADER_OP_ADD,
53     PRESHADER_OP_MUL,
54     PRESHADER_OP_ATAN2,
55     PRESHADER_OP_DIV,
56     PRESHADER_OP_CMP,
57     PRESHADER_OP_DOT,
58     PRESHADER_OP_DOTSWIZ6,
59     PRESHADER_OP_DOTSWIZ8,
60 };
61 
62 typedef double (*pres_op_func)(double *args, int n);
63 
64 static double to_signed_nan(double v)
65 {
66     static const union
67     {
68         ULONG64 ulong64_value;
69         double double_value;
70     }
71     signed_nan =
72     {
73         0xfff8000000000000
74     };
75 
76     return isnan(v) ? signed_nan.double_value : v;
77 }
78 
79 static double pres_mov(double *args, int n) {return args[0];}
80 static double pres_add(double *args, int n) {return args[0] + args[1];}
81 static double pres_mul(double *args, int n) {return args[0] * args[1];}
82 static double pres_dot(double *args, int n)
83 {
84     int i;
85     double sum;
86 
87     sum = 0.0;
88     for (i = 0; i < n; ++i)
89         sum += args[i] * args[i + n];
90     return sum;
91 }
92 
93 static double pres_dotswiz6(double *args, int n)
94 {
95     return pres_dot(args, 3);
96 }
97 
98 static double pres_dotswiz8(double *args, int n)
99 {
100     return pres_dot(args, 4);
101 }
102 
103 static double pres_neg(double *args, int n) {return -args[0];}
104 static double pres_rcp(double *args, int n) {return 1.0 / args[0];}
105 static double pres_lt(double *args, int n)  {return args[0] < args[1] ? 1.0 : 0.0;}
106 static double pres_ge(double *args, int n)  {return args[0] >= args[1] ? 1.0 : 0.0;}
107 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);}
108 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);}
109 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);}
110 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];}
111 static double pres_sin(double *args, int n) {return sin(args[0]);}
112 static double pres_cos(double *args, int n) {return cos(args[0]);}
113 static double pres_rsq(double *args, int n)
114 {
115     double v;
116 
117     v = fabs(args[0]);
118     if (v == 0.0)
119         return INFINITY;
120     else
121         return 1.0 / sqrt(v);
122 }
123 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);}
124 static double pres_log(double *args, int n)
125 {
126     double v;
127 
128     v = fabs(args[0]);
129     if (v == 0.0)
130         return 0.0;
131     else
132 #ifdef HAVE_LOG2
133         return log2(v);
134 #else
135         return log(v) / log(2);
136 #endif
137 }
138 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));}
139 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));}
140 static double pres_atan(double *args, int n) {return atan(args[0]);}
141 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);}
142 
143 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever
144  * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */
145 static double pres_div(double *args, int n) {return 0.0;}
146 
147 #define PRES_OPCODE_MASK 0x7ff00000
148 #define PRES_OPCODE_SHIFT 20
149 #define PRES_SCALAR_FLAG 0x80000000
150 #define PRES_NCOMP_MASK  0x0000ffff
151 
152 #define FOURCC_PRES 0x53455250
153 #define FOURCC_CLIT 0x54494c43
154 #define FOURCC_FXLC 0x434c5846
155 #define FOURCC_PRSI 0x49535250
156 #define PRES_SIGN 0x46580000
157 
158 struct op_info
159 {
160     unsigned int opcode;
161     char mnem[16];
162     unsigned int input_count;
163     BOOL func_all_comps;
164     pres_op_func func;
165 };
166 
167 static const struct op_info pres_op_info[] =
168 {
169     {0x000, "nop", 0, 0, NULL    }, /* PRESHADER_OP_NOP */
170     {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */
171     {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */
172     {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */
173     {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */
174     {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */
175     {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */
176     {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */
177     {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */
178     {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */
179     {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */
180     {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */
181     {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */
182     {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */
183     {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */
184     {0x202, "lt",  2, 0, pres_lt }, /* PRESHADER_OP_LT  */
185     {0x203, "ge",  2, 0, pres_ge }, /* PRESHADER_OP_GE  */
186     {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */
187     {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */
188     {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */
189     {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */
190     {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */
191     {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */
192     {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */
193     {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */
194 };
195 
196 enum pres_value_type
197 {
198     PRES_VT_FLOAT,
199     PRES_VT_DOUBLE,
200     PRES_VT_INT,
201     PRES_VT_BOOL,
202     PRES_VT_COUNT
203 };
204 
205 static const struct
206 {
207     unsigned int component_size;
208     enum pres_value_type type;
209 }
210 table_info[] =
211 {
212     {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */
213     {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */
214     {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */
215     {sizeof(BOOL),   PRES_VT_BOOL  }, /* PRES_REGTAB_OBCONST */
216     {sizeof(int),    PRES_VT_INT,  }, /* PRES_REGTAB_OICONST */
217     /* TODO: use double precision for 64 bit */
218     {sizeof(float),  PRES_VT_FLOAT }  /* PRES_REGTAB_TEMP */
219 };
220 
221 static const char *table_symbol[] =
222 {
223     "imm", "c", "oc", "ob", "oi", "r", "(null)",
224 };
225 
226 static const enum pres_reg_tables pres_regset2table[] =
227 {
228     PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
229     PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
230     PRES_REGTAB_CONST,    /* D3DXRS_FLOAT4 */
231     PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
232 };
233 
234 static const enum pres_reg_tables shad_regset2table[] =
235 {
236     PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
237     PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
238     PRES_REGTAB_OCONST,   /* D3DXRS_FLOAT4 */
239     PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
240 };
241 
242 struct d3dx_pres_reg
243 {
244     enum pres_reg_tables table;
245     /* offset is component index, not register index, e. g.
246        offset for component c3.y is 13 (3 * 4 + 1) */
247     unsigned int offset;
248 };
249 
250 struct d3dx_pres_operand
251 {
252     struct d3dx_pres_reg reg;
253     struct d3dx_pres_reg index_reg;
254 };
255 
256 #define MAX_INPUTS_COUNT 8
257 
258 struct d3dx_pres_ins
259 {
260     enum pres_ops op;
261     /* first input argument is scalar,
262        scalar component is propagated */
263     BOOL scalar_op;
264     unsigned int component_count;
265     struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT];
266     struct d3dx_pres_operand output;
267 };
268 
269 struct const_upload_info
270 {
271     BOOL transpose;
272     unsigned int major, minor;
273     unsigned int major_stride;
274     unsigned int major_count;
275     unsigned int count;
276     unsigned int minor_remainder;
277 };
278 
279 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type)
280 {
281     switch (type)
282     {
283         case D3DXPT_FLOAT:
284             return PRES_VT_FLOAT;
285         case D3DXPT_INT:
286             return PRES_VT_INT;
287         case D3DXPT_BOOL:
288             return PRES_VT_BOOL;
289         default:
290             FIXME("Unsupported type %u.\n", type);
291             return PRES_VT_COUNT;
292     }
293 }
294 
295 static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
296 {
297     return table == PRES_REGTAB_OBCONST ? offset : offset >> 2;
298 }
299 
300 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx)
301 {
302     return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2;
303 }
304 
305 static unsigned int get_reg_components(unsigned int table)
306 {
307     return get_offset_reg(table, 1);
308 }
309 
310 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
311 
312 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
313 {
314     unsigned int size;
315 
316     size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size;
317     if (size)
318     {
319         rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
320         if (!rs->tables[table])
321             return E_OUTOFMEMORY;
322     }
323     return D3D_OK;
324 }
325 
326 static void regstore_free_tables(struct d3dx_regstore *rs)
327 {
328     unsigned int i;
329 
330     for (i = 0; i < PRES_REGTAB_COUNT; ++i)
331     {
332         HeapFree(GetProcessHeap(), 0, rs->tables[i]);
333     }
334 }
335 
336 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data,
337         unsigned int start_offset, unsigned int count)
338 {
339     BYTE *dst = rs->tables[table];
340     const BYTE *src = data;
341     unsigned int size;
342 
343     dst += start_offset * table_info[table].component_size;
344     size = count * table_info[table].component_size;
345     assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst));
346     memcpy(dst, src, size);
347 }
348 
349 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset)
350 {
351     BYTE *p;
352 
353     p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
354     switch (table_info[table].type)
355     {
356         case PRES_VT_FLOAT:
357             return *(float *)p;
358         case PRES_VT_DOUBLE:
359             return *(double *)p;
360         default:
361             FIXME("Unexpected preshader input from table %u.\n", table);
362             return NAN;
363     }
364 }
365 
366 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v)
367 {
368     BYTE *p;
369 
370     p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
371     switch (table_info[table].type)
372     {
373         case PRES_VT_FLOAT : *(float *)p = v; break;
374         case PRES_VT_DOUBLE: *(double *)p = v; break;
375         case PRES_VT_INT   : *(int *)p = lrint(v); break;
376         case PRES_VT_BOOL  : *(BOOL *)p = !!v; break;
377         default:
378             FIXME("Bad type %u.\n", table_info[table].type);
379             break;
380     }
381 }
382 
383 static void dump_bytecode(void *data, unsigned int size)
384 {
385     unsigned int *bytecode = (unsigned int *)data;
386     unsigned int i, j, n;
387 
388     size /= sizeof(*bytecode);
389     i = 0;
390     while (i < size)
391     {
392         n = min(size - i, 8);
393         for (j = 0; j < n; ++j)
394             TRACE("0x%08x,", bytecode[i + j]);
395         i += n;
396         TRACE("\n");
397     }
398 }
399 
400 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count,
401         unsigned int fourcc, unsigned int *size)
402 {
403     /* Provide at least one value in comment section on non-NULL return. */
404     while (count > 2 && (*ptr & 0xffff) == 0xfffe)
405     {
406         unsigned int section_size;
407 
408         section_size = (*ptr >> 16);
409         if (!section_size || section_size + 1 > count)
410             break;
411         if (*(ptr + 1) == fourcc)
412         {
413             *size = section_size;
414             return ptr + 2;
415         }
416         count -= section_size + 1;
417         ptr += section_size + 1;
418     }
419     return NULL;
420 }
421 
422 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg)
423 {
424     static const enum pres_reg_tables reg_table[8] =
425     {
426         PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT,
427         PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP
428     };
429 
430     if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT)
431     {
432         FIXME("Unsupported register table %#x.\n", *ptr);
433         return NULL;
434     }
435 
436     reg->table = reg_table[*ptr++];
437     reg->offset = *ptr++;
438     return ptr;
439 }
440 
441 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr)
442 {
443     if (count < 3 || (*ptr && count < 5))
444     {
445         WARN("Byte code buffer ends unexpectedly, count %u.\n", count);
446         return NULL;
447     }
448 
449     if (*ptr)
450     {
451         if (*ptr != 1)
452         {
453             FIXME("Unknown relative addressing flag, word %#x.\n", *ptr);
454             return NULL;
455         }
456         ptr = parse_pres_reg(ptr + 1, &opr->index_reg);
457         if (!ptr)
458             return NULL;
459     }
460     else
461     {
462         opr->index_reg.table = PRES_REGTAB_COUNT;
463         ++ptr;
464     }
465 
466     ptr = parse_pres_reg(ptr, &opr->reg);
467 
468     if (opr->reg.table == PRES_REGTAB_OBCONST)
469         opr->reg.offset /= 4;
470     return ptr;
471 }
472 
473 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins)
474 {
475     unsigned int ins_code, ins_raw;
476     unsigned int input_count;
477     unsigned int i;
478 
479     if (count < 2)
480     {
481         WARN("Byte code buffer ends unexpectedly.\n");
482         return NULL;
483     }
484 
485     ins_raw = *ptr++;
486     ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT;
487     ins->component_count = ins_raw & PRES_NCOMP_MASK;
488     ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG);
489 
490     if (ins->component_count < 1 || ins->component_count > 4)
491     {
492         FIXME("Unsupported number of components %u.\n", ins->component_count);
493         return NULL;
494     }
495     input_count = *ptr++;
496     count -= 2;
497     for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i)
498         if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count)
499             break;
500     if (i == ARRAY_SIZE(pres_op_info))
501     {
502         FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw);
503         return NULL;
504     }
505     ins->op = i;
506     if (input_count > ARRAY_SIZE(ins->inputs))
507     {
508         FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count,
509                 pres_op_info[i].mnem);
510         return NULL;
511     }
512     for (i = 0; i < input_count; ++i)
513     {
514         unsigned int *p;
515 
516         p = parse_pres_arg(ptr, count, &ins->inputs[i]);
517         if (!p)
518             return NULL;
519         count -= p - ptr;
520         ptr = p;
521     }
522     ptr = parse_pres_arg(ptr, count, &ins->output);
523     if (ins->output.index_reg.table != PRES_REGTAB_COUNT)
524     {
525         FIXME("Relative addressing in output register not supported.\n");
526         return NULL;
527     }
528     if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset
529             + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1))
530             != get_reg_offset(ins->output.reg.table, ins->output.reg.offset))
531     {
532         FIXME("Instructions outputting multiple registers are not supported.\n");
533         return NULL;
534     }
535     return ptr;
536 }
537 
538 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc,
539         WORD *constantinfo_reserved)
540 {
541     const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc);
542 
543     if (!constant)
544     {
545         FIXME("Could not get constant desc.\n");
546         if (constantinfo_reserved)
547             *constantinfo_reserved = 0;
548         return D3DERR_INVALIDCALL;
549     }
550     *desc = constant->desc;
551     if (constantinfo_reserved)
552         *constantinfo_reserved = constant->constantinfo_reserved;
553     return D3D_OK;
554 }
555 
556 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set,
557         struct const_upload_info *info)
558 {
559     struct d3dx_parameter *param = const_set->param;
560     unsigned int table = const_set->table;
561 
562     info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
563             || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS);
564     if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS)
565     {
566         info->major = param->columns;
567         info->minor = param->rows;
568     }
569     else
570     {
571         info->major = param->rows;
572         info->minor = param->columns;
573     }
574 
575     if (get_reg_components(table) == 1)
576     {
577         unsigned int const_length = get_offset_reg(table, const_set->register_count);
578 
579         info->major_stride = info->minor;
580         info->major_count = const_length / info->major_stride;
581         info->minor_remainder = const_length % info->major_stride;
582     }
583     else
584     {
585         info->major_stride = get_reg_components(table);
586         info->major_count = const_set->register_count;
587         info->minor_remainder = 0;
588     }
589     info->count = info->major_count * info->minor + info->minor_remainder;
590 }
591 
592 #define INITIAL_CONST_SET_SIZE 16
593 
594 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set)
595 {
596     if (const_tab->const_set_count >= const_tab->const_set_size)
597     {
598         unsigned int new_size;
599         struct d3dx_const_param_eval_output *new_alloc;
600 
601         if (!const_tab->const_set_size)
602         {
603             new_size = INITIAL_CONST_SET_SIZE;
604             new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
605             if (!new_alloc)
606             {
607                 ERR("Out of memory.\n");
608                 return E_OUTOFMEMORY;
609             }
610         }
611         else
612         {
613             new_size = const_tab->const_set_size * 2;
614             new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
615                     sizeof(*const_tab->const_set) * new_size);
616             if (!new_alloc)
617             {
618                 ERR("Out of memory.\n");
619                 return E_OUTOFMEMORY;
620             }
621         }
622         const_tab->const_set = new_alloc;
623         const_tab->const_set_size = new_size;
624     }
625     const_tab->const_set[const_tab->const_set_count++] = *set;
626     return D3D_OK;
627 }
628 
629 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab,
630         struct d3dx_preshader *pres)
631 {
632     unsigned int i;
633     struct d3dx_const_param_eval_output const_set = {NULL};
634 
635     for (i = 0; i < pres->ins_count; ++i)
636     {
637         const struct d3dx_pres_ins *ins = &pres->ins[i];
638         const struct d3dx_pres_reg *reg = &ins->output.reg;
639 
640         if (reg->table == PRES_REGTAB_TEMP)
641             continue;
642 
643         const_set.register_index = get_reg_offset(reg->table, reg->offset);
644         const_set.register_count = 1;
645         const_set.table = reg->table;
646         const_set.constant_class = D3DXPC_FORCE_DWORD;
647         const_set.element_count = 1;
648         append_const_set(const_tab, &const_set);
649     }
650 }
651 
652 static int compare_const_set(const void *a, const void *b)
653 {
654     const struct d3dx_const_param_eval_output *r1 = a;
655     const struct d3dx_const_param_eval_output *r2 = b;
656 
657     if (r1->table != r2->table)
658         return r1->table - r2->table;
659     return r1->register_index - r2->register_index;
660 }
661 
662 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab,
663         struct d3dx_parameter *param, unsigned int index)
664 {
665     unsigned int i, start_index = index;
666     DWORD *current_data;
667     enum pres_reg_tables current_table;
668     unsigned int current_start_offset, element_count;
669     struct d3dx_const_param_eval_output *first_const;
670 
671     if (!const_tab->const_set_count)
672         return D3D_OK;
673 
674     while (index < const_tab->const_set_count - 1)
675     {
676         first_const = &const_tab->const_set[index];
677         current_data = first_const->param->data;
678         current_table = first_const->table;
679         current_start_offset = get_offset_reg(current_table, first_const->register_index);
680         element_count = 0;
681         for (i = index; i < const_tab->const_set_count; ++i)
682         {
683             struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i];
684             unsigned int count = get_offset_reg(const_set->table,
685                     const_set->register_count * const_set->element_count);
686             unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index);
687 
688             if (!(const_set->table == current_table && current_start_offset == start_offset
689                     && const_set->direct_copy == first_const->direct_copy
690                     && current_data == const_set->param->data
691                     && (const_set->direct_copy || (first_const->param->type == const_set->param->type
692                     && first_const->param->class == const_set->param->class
693                     && first_const->param->columns == const_set->param->columns
694                     && first_const->param->rows == const_set->param->rows
695                     && first_const->register_count == const_set->register_count
696                     && (i == const_tab->const_set_count - 1
697                     || first_const->param->element_count == const_set->param->element_count)))))
698                 break;
699 
700             current_start_offset += count;
701             current_data += const_set->direct_copy ? count : const_set->param->rows
702                     * const_set->param->columns * const_set->element_count;
703             element_count += const_set->element_count;
704         }
705 
706         if (i > index + 1)
707         {
708             TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index,
709                     debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy);
710 
711             first_const->element_count = element_count;
712             if (first_const->direct_copy)
713             {
714                 first_const->element_count = 1;
715                 if (index == start_index
716                         && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT))
717                 {
718                     if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
719                         return D3DERR_INVALIDCALL;
720                     first_const->param = param;
721                 }
722                 first_const->register_count = get_reg_offset(current_table, current_start_offset)
723                         - first_const->register_index;
724             }
725             memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i],
726                     sizeof(*const_tab->const_set) * (const_tab->const_set_count - i));
727             const_tab->const_set_count -= i - index - 1;
728         }
729         else
730         {
731             TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n",
732                     const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy);
733         }
734         index = i;
735     }
736     return D3D_OK;
737 }
738 
739 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
740         D3DXHANDLE hc, struct d3dx_parameter *param)
741 {
742     D3DXCONSTANT_DESC desc;
743     unsigned int const_count, param_count, i;
744     BOOL get_element;
745     struct d3dx_const_param_eval_output const_set;
746     struct const_upload_info info;
747     enum pres_value_type table_type;
748     HRESULT hr;
749 
750     if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL)))
751         return D3DERR_INVALIDCALL;
752 
753     if (param->element_count)
754     {
755         param_count = param->element_count;
756         const_count = desc.Elements;
757         get_element = TRUE;
758     }
759     else
760     {
761         if (desc.Elements > 1)
762         {
763             FIXME("Unexpected number of constant elements %u.\n", desc.Elements);
764             return D3DERR_INVALIDCALL;
765         }
766         param_count = param->member_count;
767         const_count = desc.StructMembers;
768         get_element = FALSE;
769     }
770     if (const_count != param_count)
771     {
772         FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n",
773                 param_count, const_count);
774         return D3DERR_INVALIDCALL;
775     }
776     if (const_count)
777     {
778         HRESULT ret = D3D_OK;
779         D3DXHANDLE hc_element;
780         unsigned int index = const_tab->const_set_count;
781 
782         for (i = 0; i < const_count; ++i)
783         {
784             if (get_element)
785                 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i);
786             else
787                 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i);
788             if (!hc_element)
789             {
790                 FIXME("Could not get constant.\n");
791                 hr = D3DERR_INVALIDCALL;
792             }
793             else
794             {
795                 hr = init_set_constants_param(const_tab, ctab, hc_element, &param->members[i]);
796             }
797             if (FAILED(hr))
798                 ret = hr;
799         }
800         if (FAILED(ret))
801             return ret;
802         return merge_const_set_entries(const_tab, param, index);
803     }
804 
805     TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n",
806             debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes);
807     TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n",
808             debugstr_a(param->name), param->rows, param->columns, param->class,
809             param->flags, param->bytes);
810 
811     const_set.element_count = 1;
812     const_set.param = param;
813     const_set.constant_class = desc.Class;
814     if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table))
815     {
816         FIXME("Unknown register set %u.\n", desc.RegisterSet);
817         return D3DERR_INVALIDCALL;
818     }
819     const_set.register_index = desc.RegisterIndex;
820     const_set.table = const_tab->regset2table[desc.RegisterSet];
821     if (const_set.table >= PRES_REGTAB_COUNT)
822     {
823         ERR("Unexpected register set %u.\n", desc.RegisterSet);
824         return D3DERR_INVALIDCALL;
825     }
826     assert(table_info[const_set.table].component_size == sizeof(unsigned int));
827     assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int));
828     const_set.register_count = desc.RegisterCount;
829     table_type = table_info[const_set.table].type;
830     get_const_upload_info(&const_set, &info);
831     if (!info.count)
832     {
833         TRACE("%s has zero count, skipping.\n", debugstr_a(param->name));
834         return D3D_OK;
835     }
836 
837     if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
838         return D3DERR_INVALIDCALL;
839 
840     const_set.direct_copy = table_type_from_param_type(param->type) == table_type
841             && !info.transpose && info.minor == info.major_stride
842             && info.count == get_offset_reg(const_set.table, const_set.register_count)
843             && info.count * sizeof(unsigned int) <= param->bytes;
844     if (info.minor_remainder && !const_set.direct_copy && !info.transpose)
845         FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n",
846                 debugstr_a(param->name));
847 
848     if (info.major_count > info.major
849             || (info.major_count == info.major && info.minor_remainder))
850     {
851         WARN("Constant dimensions exceed parameter size.\n");
852         return D3DERR_INVALIDCALL;
853     }
854 
855     if (FAILED(hr = append_const_set(const_tab, &const_set)))
856         return hr;
857 
858     return D3D_OK;
859 }
860 
861 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out,
862         struct d3dx9_base_effect *base, const char **skip_constants,
863         unsigned int skip_constants_count, struct d3dx_preshader *pres)
864 {
865     ID3DXConstantTable *ctab;
866     D3DXCONSTANT_DESC *cdesc;
867     struct d3dx_parameter **inputs_param;
868     D3DXCONSTANTTABLE_DESC desc;
869     HRESULT hr;
870     D3DXHANDLE hc;
871     unsigned int i, j;
872 
873     hr = D3DXGetShaderConstantTable(byte_code, &ctab);
874     if (FAILED(hr) || !ctab)
875     {
876         TRACE("Could not get CTAB data, hr %#x.\n", hr);
877         /* returning OK, shaders and preshaders without CTAB are valid */
878         return D3D_OK;
879     }
880     if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc)))
881     {
882         FIXME("Could not get CTAB desc, hr %#x.\n", hr);
883         goto cleanup;
884     }
885 
886     out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants);
887     out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants);
888     if (!cdesc || !inputs_param)
889     {
890         hr = E_OUTOFMEMORY;
891         goto cleanup;
892     }
893 
894     for (i = 0; i < desc.Constants; ++i)
895     {
896         unsigned int index = out->input_count;
897         WORD constantinfo_reserved;
898 
899         hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
900         if (!hc)
901         {
902             FIXME("Null constant handle.\n");
903             goto cleanup;
904         }
905         if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved)))
906             goto cleanup;
907         inputs_param[index] = get_parameter_by_name(base, NULL, cdesc[index].Name);
908         if (!inputs_param[index])
909         {
910             WARN("Could not find parameter %s in effect.\n", cdesc[index].Name);
911             continue;
912         }
913         if (cdesc[index].Class == D3DXPC_OBJECT)
914         {
915             TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]);
916             if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT
917                     || !is_param_type_sampler(inputs_param[index]->type))
918             {
919                 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name));
920                 hr = D3DERR_INVALIDCALL;
921                 goto cleanup;
922             }
923             if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount)
924             {
925                 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name));
926                 hr = D3DERR_INVALIDCALL;
927                 goto cleanup;
928             }
929         }
930         if (!is_top_level_parameter(inputs_param[index]))
931         {
932             WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name));
933             hr = E_FAIL;
934             goto cleanup;
935         }
936 
937         for (j = 0; j < skip_constants_count; ++j)
938         {
939             if (!strcmp(cdesc[index].Name, skip_constants[j]))
940             {
941                 if (!constantinfo_reserved)
942                 {
943                     WARN("skip_constants parameter %s is not register bound.\n",
944                             cdesc[index].Name);
945                     hr = D3DERR_INVALIDCALL;
946                     goto cleanup;
947                 }
948                 TRACE("Skipping constant %s.\n", cdesc[index].Name);
949                 break;
950             }
951         }
952         if (j < skip_constants_count)
953             continue;
954         ++out->input_count;
955         if (inputs_param[index]->class == D3DXPC_OBJECT)
956             continue;
957         if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index])))
958             goto cleanup;
959     }
960     if (pres)
961         append_pres_const_sets_for_shader_input(out, pres);
962     if (out->const_set_count)
963     {
964         struct d3dx_const_param_eval_output *new_alloc;
965 
966         qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set);
967 
968         i = 0;
969         while (i < out->const_set_count - 1)
970         {
971             if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD
972                     && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD
973                     && out->const_set[i].table == out->const_set[i + 1].table
974                     && out->const_set[i].register_index + out->const_set[i].register_count
975                     >= out->const_set[i + 1].register_index)
976             {
977                 assert(out->const_set[i].register_index + out->const_set[i].register_count
978                         <= out->const_set[i + 1].register_index + 1);
979                 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1
980                         - out->const_set[i].register_index;
981                 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i])
982                         * (out->const_set_count - i - 2));
983                 --out->const_set_count;
984             }
985             else
986             {
987                 ++i;
988             }
989         }
990 
991         new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set,
992                 sizeof(*out->const_set) * out->const_set_count);
993         if (new_alloc)
994         {
995             out->const_set = new_alloc;
996             out->const_set_size = out->const_set_count;
997         }
998         else
999         {
1000             WARN("Out of memory.\n");
1001         }
1002     }
1003 cleanup:
1004     ID3DXConstantTable_Release(ctab);
1005     return hr;
1006 }
1007 
1008 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register)
1009 {
1010     if (table < PRES_REGTAB_COUNT)
1011         table_sizes[table] = max(table_sizes[table], max_register + 1);
1012 }
1013 
1014 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab)
1015 {
1016     unsigned int i, table, max_register;
1017 
1018     for (i = 0; i < ctab->input_count; ++i)
1019     {
1020         if (!ctab->inputs[i].RegisterCount)
1021             continue;
1022         max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1;
1023         table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1024         update_table_size(table_sizes, table, max_register);
1025     }
1026 }
1027 
1028 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count)
1029 {
1030     static const char *xyzw_str = "xyzw";
1031     unsigned int i, table;
1032 
1033     table = arg->reg.table;
1034     if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT)
1035     {
1036         TRACE("(");
1037         for (i = 0; i < component_count; ++i)
1038             TRACE(i < component_count - 1 ? "%.16e, " : "%.16e",
1039                     ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]);
1040         TRACE(")");
1041     }
1042     else
1043     {
1044         if (arg->index_reg.table == PRES_REGTAB_COUNT)
1045         {
1046             TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset));
1047         }
1048         else
1049         {
1050             unsigned int index_reg;
1051 
1052             index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset);
1053             TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset),
1054                     table_symbol[arg->index_reg.table], index_reg,
1055                     xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]);
1056         }
1057         for (i = 0; i < component_count; ++i)
1058             TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]);
1059     }
1060 }
1061 
1062 static void dump_registers(struct d3dx_const_tab *ctab)
1063 {
1064     unsigned int table, i;
1065 
1066     for (i = 0; i < ctab->input_count; ++i)
1067     {
1068         table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1069         TRACE("//   %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)",
1070                 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount);
1071     }
1072 }
1073 
1074 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins)
1075 {
1076     unsigned int i;
1077 
1078     TRACE("%s ", pres_op_info[ins->op].mnem);
1079     dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count);
1080     for (i = 0; i < pres_op_info[ins->op].input_count; ++i)
1081     {
1082         TRACE(", ");
1083         dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count);
1084     }
1085     TRACE("\n");
1086 }
1087 
1088 static void dump_preshader(struct d3dx_preshader *pres)
1089 {
1090     unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4;
1091     const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED];
1092 
1093     if (immediate_count)
1094         TRACE("// Immediates:\n");
1095     for (i = 0; i < immediate_count; ++i)
1096     {
1097         if (!(i % 4))
1098             TRACE("// ");
1099         TRACE("%.8e", immediates[i]);
1100         if (i % 4 == 3)
1101             TRACE("\n");
1102         else
1103             TRACE(", ");
1104     }
1105     TRACE("// Preshader registers:\n");
1106     dump_registers(&pres->inputs);
1107     TRACE("preshader\n");
1108     for (i = 0; i < pres->ins_count; ++i)
1109         dump_ins(&pres->regs, &pres->ins[i]);
1110 }
1111 
1112 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base)
1113 {
1114     unsigned int *p;
1115     unsigned int i, j, const_count;
1116     double *dconst;
1117     HRESULT hr;
1118     unsigned int saved_word;
1119     unsigned int section_size;
1120 
1121     TRACE("Preshader version %#x.\n", *ptr & 0xffff);
1122 
1123     if (!count)
1124     {
1125         WARN("Unexpected end of byte code buffer.\n");
1126         return D3DXERR_INVALIDDATA;
1127     }
1128 
1129     p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, &section_size);
1130     if (p)
1131     {
1132         const_count = *p++;
1133         if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int)))
1134         {
1135             WARN("Byte code buffer ends unexpectedly.\n");
1136             return D3DXERR_INVALIDDATA;
1137         }
1138         dconst = (double *)p;
1139     }
1140     else
1141     {
1142         const_count = 0;
1143         dconst = NULL;
1144     }
1145     TRACE("%u double constants.\n", const_count);
1146 
1147     p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, &section_size);
1148     if (!p)
1149     {
1150         WARN("Could not find preshader code.\n");
1151         return D3D_OK;
1152     }
1153     pres->ins_count = *p++;
1154     --section_size;
1155     if (pres->ins_count > UINT_MAX / sizeof(*pres->ins))
1156     {
1157         WARN("Invalid instruction count %u.\n", pres->ins_count);
1158         return D3DXERR_INVALIDDATA;
1159     }
1160     TRACE("%u instructions.\n", pres->ins_count);
1161     pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count);
1162     if (!pres->ins)
1163         return E_OUTOFMEMORY;
1164     for (i = 0; i < pres->ins_count; ++i)
1165     {
1166         unsigned int *ptr_next;
1167 
1168         ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]);
1169         if (!ptr_next)
1170             return D3DXERR_INVALIDDATA;
1171         section_size -= ptr_next - p;
1172         p = ptr_next;
1173     }
1174 
1175     pres->inputs.regset2table = pres_regset2table;
1176 
1177     saved_word = *ptr;
1178     *ptr = 0xfffe0000;
1179     hr = get_constants_desc(ptr, &pres->inputs, base, NULL, 0, NULL);
1180     *ptr = saved_word;
1181     if (FAILED(hr))
1182         return hr;
1183 
1184     if (const_count % get_reg_components(PRES_REGTAB_IMMED))
1185     {
1186         FIXME("const_count %u is not a multiple of %u.\n", const_count,
1187                 get_reg_components(PRES_REGTAB_IMMED));
1188         return D3DXERR_INVALIDDATA;
1189     }
1190     pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count);
1191 
1192     update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs);
1193     for (i = 0; i < pres->ins_count; ++i)
1194     {
1195         for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j)
1196         {
1197             enum pres_reg_tables table;
1198             unsigned int reg_idx;
1199 
1200             if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT)
1201             {
1202                 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0
1203                         : pres->ins[i].component_count - 1;
1204 
1205                 table = pres->ins[i].inputs[j].reg.table;
1206                 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset
1207                         + last_component_index);
1208             }
1209             else
1210             {
1211                 table = pres->ins[i].inputs[j].index_reg.table;
1212                 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset);
1213             }
1214             if (reg_idx >= pres->regs.table_sizes[table])
1215             {
1216                 /* Native accepts these broken preshaders. */
1217                 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u, preshader parsing failed.\n",
1218                         i, j, table, reg_idx);
1219                 return D3DXERR_INVALIDDATA;
1220             }
1221         }
1222         update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table,
1223                 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset));
1224     }
1225     if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED)))
1226         return E_OUTOFMEMORY;
1227     regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count);
1228 
1229     return D3D_OK;
1230 }
1231 
1232 HRESULT d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size,
1233         D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter,
1234         const char **skip_constants, unsigned int skip_constants_count)
1235 {
1236     struct d3dx_param_eval *peval;
1237     unsigned int *ptr, *shader_ptr = NULL;
1238     unsigned int i;
1239     BOOL shader;
1240     unsigned int count, pres_size;
1241     HRESULT ret;
1242 
1243     TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n",
1244             base_effect, byte_code, byte_code_size, type, peval_out);
1245 
1246     count = byte_code_size / sizeof(unsigned int);
1247     if (!byte_code || !count)
1248     {
1249         *peval_out = NULL;
1250         return D3D_OK;
1251     }
1252 
1253     peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval));
1254     if (!peval)
1255     {
1256         ret = E_OUTOFMEMORY;
1257         goto err_out;
1258     }
1259     peval->version_counter = version_counter;
1260 
1261     peval->param_type = type;
1262     switch (type)
1263     {
1264         case D3DXPT_VERTEXSHADER:
1265         case D3DXPT_PIXELSHADER:
1266             shader = TRUE;
1267             break;
1268         default:
1269             shader = FALSE;
1270             break;
1271     }
1272     peval->shader_inputs.regset2table = shad_regset2table;
1273 
1274     ptr = (unsigned int *)byte_code;
1275     if (shader)
1276     {
1277         if ((*ptr & 0xfffe0000) != 0xfffe0000)
1278         {
1279             FIXME("Invalid shader signature %#x.\n", *ptr);
1280             ret = D3DXERR_INVALIDDATA;
1281             goto err_out;
1282         }
1283         TRACE("Shader version %#x.\n", *ptr & 0xffff);
1284         shader_ptr = ptr;
1285         ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size);
1286         if (!ptr)
1287             TRACE("No preshader found.\n");
1288     }
1289     else
1290     {
1291         pres_size = count;
1292     }
1293 
1294     if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, base_effect)))
1295     {
1296         FIXME("Failed parsing preshader, byte code for analysis follows.\n");
1297         dump_bytecode(byte_code, byte_code_size);
1298         goto err_out;
1299     }
1300 
1301     if (shader)
1302     {
1303         if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, base_effect,
1304                 skip_constants, skip_constants_count, &peval->pres)))
1305         {
1306             TRACE("Could not get shader constant table, hr %#x.\n", ret);
1307             goto err_out;
1308         }
1309         update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs);
1310     }
1311 
1312     for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i)
1313     {
1314         if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i)))
1315             goto err_out;
1316     }
1317 
1318     if (TRACE_ON(d3dx))
1319     {
1320         dump_bytecode(byte_code, byte_code_size);
1321         dump_preshader(&peval->pres);
1322         if (shader)
1323         {
1324             TRACE("// Shader registers:\n");
1325             dump_registers(&peval->shader_inputs);
1326         }
1327     }
1328     *peval_out = peval;
1329     TRACE("Created parameter evaluator %p.\n", *peval_out);
1330     return D3D_OK;
1331 
1332 err_out:
1333     WARN("Error creating parameter evaluator.\n");
1334     if (TRACE_ON(d3dx))
1335         dump_bytecode(byte_code, byte_code_size);
1336 
1337     d3dx_free_param_eval(peval);
1338     *peval_out = NULL;
1339     return ret;
1340 }
1341 
1342 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
1343 {
1344     HeapFree(GetProcessHeap(), 0, ctab->inputs);
1345     HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
1346     HeapFree(GetProcessHeap(), 0, ctab->const_set);
1347 }
1348 
1349 static void d3dx_free_preshader(struct d3dx_preshader *pres)
1350 {
1351     HeapFree(GetProcessHeap(), 0, pres->ins);
1352 
1353     regstore_free_tables(&pres->regs);
1354     d3dx_free_const_tab(&pres->inputs);
1355 }
1356 
1357 void d3dx_free_param_eval(struct d3dx_param_eval *peval)
1358 {
1359     TRACE("peval %p.\n", peval);
1360 
1361     if (!peval)
1362         return;
1363 
1364     d3dx_free_preshader(&peval->pres);
1365     d3dx_free_const_tab(&peval->shader_inputs);
1366     HeapFree(GetProcessHeap(), 0, peval);
1367 }
1368 
1369 static void pres_int_from_float(void *out, const void *in, unsigned int count)
1370 {
1371     unsigned int i;
1372     const float *in_float = in;
1373     int *out_int = out;
1374 
1375     for (i = 0; i < count; ++i)
1376         out_int[i] = in_float[i];
1377 }
1378 
1379 static void pres_bool_from_value(void *out, const void *in, unsigned int count)
1380 {
1381     unsigned int i;
1382     const DWORD *in_dword = in;
1383     BOOL *out_bool = out;
1384 
1385     for (i = 0; i < count; ++i)
1386         out_bool[i] = !!in_dword[i];
1387 }
1388 
1389 static void pres_float_from_int(void *out, const void *in, unsigned int count)
1390 {
1391     unsigned int i;
1392     const int *in_int = in;
1393     float *out_float = out;
1394 
1395     for (i = 0; i < count; ++i)
1396         out_float[i] = in_int[i];
1397 }
1398 
1399 static void pres_float_from_bool(void *out, const void *in, unsigned int count)
1400 {
1401     unsigned int i;
1402     const BOOL *in_bool = in;
1403     float *out_float = out;
1404 
1405     for (i = 0; i < count; ++i)
1406         out_float[i] = !!in_bool[i];
1407 }
1408 
1409 static void pres_int_from_bool(void *out, const void *in, unsigned int count)
1410 {
1411     unsigned int i;
1412     const float *in_bool = in;
1413     int *out_int = out;
1414 
1415     for (i = 0; i < count; ++i)
1416         out_int[i] = !!in_bool[i];
1417 }
1418 
1419 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table,
1420         unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type)
1421 {
1422     typedef void (*conv_func)(void *out, const void *in, unsigned int count);
1423     static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] =
1424     {
1425         {NULL,                 NULL, pres_int_from_float, pres_bool_from_value},
1426         {NULL,                 NULL, NULL,                NULL},
1427         {pres_float_from_int,  NULL, NULL,                pres_bool_from_value},
1428         {pres_float_from_bool, NULL, pres_int_from_bool,  NULL}
1429     };
1430     enum pres_value_type table_type = table_info[table].type;
1431 
1432     if (param_type == table_type)
1433     {
1434         regstore_set_values(rs, table, in, offset, count);
1435         return;
1436     }
1437 
1438     set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count);
1439 }
1440 
1441 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1442         D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr,
1443         unsigned int start, unsigned int count)
1444 {
1445     if (type == D3DXPT_VERTEXSHADER)
1446     {
1447         switch(table)
1448         {
1449             case PRES_REGTAB_OCONST:
1450                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count);
1451             case PRES_REGTAB_OICONST:
1452                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count);
1453             case PRES_REGTAB_OBCONST:
1454                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count);
1455             default:
1456                 FIXME("Unexpected register table %u.\n", table);
1457                 return D3DERR_INVALIDCALL;
1458         }
1459     }
1460     else if (type == D3DXPT_PIXELSHADER)
1461     {
1462         switch(table)
1463         {
1464             case PRES_REGTAB_OCONST:
1465                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count);
1466             case PRES_REGTAB_OICONST:
1467                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count);
1468             case PRES_REGTAB_OBCONST:
1469                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count);
1470             default:
1471                 FIXME("Unexpected register table %u.\n", table);
1472                 return D3DERR_INVALIDCALL;
1473         }
1474     }
1475     else
1476     {
1477         FIXME("Unexpected parameter type %u.\n", type);
1478         return D3DERR_INVALIDCALL;
1479     }
1480 }
1481 
1482 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
1483         ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1484         D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty)
1485 {
1486     unsigned int const_idx;
1487     unsigned int current_start = 0, current_count = 0;
1488     enum pres_reg_tables current_table = PRES_REGTAB_COUNT;
1489     BOOL update_device = manager || device;
1490     HRESULT hr, result = D3D_OK;
1491     ULONG64 update_version = const_tab->update_version;
1492 
1493     for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1494     {
1495         struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1496         enum pres_reg_tables table = const_set->table;
1497         struct d3dx_parameter *param = const_set->param;
1498         unsigned int element, i, j, start_offset;
1499         struct const_upload_info info;
1500         unsigned int *data;
1501         enum pres_value_type param_type;
1502 
1503         if (!(param && is_param_dirty(param, update_version)))
1504             continue;
1505 
1506         data = param->data;
1507         start_offset = get_offset_reg(table, const_set->register_index);
1508         if (const_set->direct_copy)
1509         {
1510             regstore_set_values(rs, table, data, start_offset,
1511                     get_offset_reg(table, const_set->register_count));
1512             continue;
1513         }
1514         param_type = table_type_from_param_type(param->type);
1515         if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR)
1516         {
1517             unsigned int count = max(param->rows, param->columns);
1518 
1519             if (count >= get_reg_components(table))
1520             {
1521                 regstore_set_data(rs, table, start_offset, data,
1522                         count * const_set->element_count, param_type);
1523             }
1524             else
1525             {
1526                 for (element = 0; element < const_set->element_count; ++element)
1527                     regstore_set_data(rs, table, start_offset + get_offset_reg(table, element),
1528                             &data[element * count], count, param_type);
1529             }
1530             continue;
1531         }
1532         get_const_upload_info(const_set, &info);
1533         for (element = 0; element < const_set->element_count; ++element)
1534         {
1535             unsigned int *out = (unsigned int *)rs->tables[table] + start_offset;
1536 
1537             /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer.
1538              * All the supported types of parameters and table values have the same size. */
1539             if (info.transpose)
1540             {
1541                 for (i = 0; i < info.major_count; ++i)
1542                     for (j = 0; j < info.minor; ++j)
1543                         out[i * info.major_stride + j] = data[i + j * info.major];
1544 
1545                 for (j = 0; j < info.minor_remainder; ++j)
1546                     out[i * info.major_stride + j] = data[i + j * info.major];
1547             }
1548             else
1549             {
1550                 for (i = 0; i < info.major_count; ++i)
1551                     for (j = 0; j < info.minor; ++j)
1552                         out[i * info.major_stride + j] = data[i * info.minor + j];
1553             }
1554             start_offset += get_offset_reg(table, const_set->register_count);
1555             data += param->rows * param->columns;
1556         }
1557         start_offset = get_offset_reg(table, const_set->register_index);
1558         if (table_info[table].type != param_type)
1559             regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset,
1560                     get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type);
1561     }
1562     const_tab->update_version = new_update_version;
1563     if (!update_device)
1564         return D3D_OK;
1565 
1566     for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1567     {
1568         struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1569 
1570         if (device_update_all || (const_set->param
1571                 ? is_param_dirty(const_set->param, update_version) : pres_dirty))
1572         {
1573             enum pres_reg_tables table = const_set->table;
1574 
1575             if (table == current_table && current_start + current_count == const_set->register_index)
1576             {
1577                 current_count += const_set->register_count * const_set->element_count;
1578             }
1579             else
1580             {
1581                 if (current_count)
1582                 {
1583                     if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1584                             (DWORD *)rs->tables[current_table]
1585                             + get_offset_reg(current_table, current_start), current_start, current_count)))
1586                         result = hr;
1587                 }
1588                 current_table = table;
1589                 current_start = const_set->register_index;
1590                 current_count = const_set->register_count * const_set->element_count;
1591             }
1592         }
1593     }
1594     if (current_count)
1595     {
1596         if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1597                 (DWORD *)rs->tables[current_table]
1598                 + get_offset_reg(current_table, current_start), current_start, current_count)))
1599             result = hr;
1600     }
1601     return result;
1602 }
1603 
1604 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset)
1605 {
1606     return regstore_get_double(rs, table, offset);
1607 }
1608 
1609 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp)
1610 {
1611     unsigned int offset, base_index, reg_index, table;
1612 
1613     table = opr->reg.table;
1614 
1615     if (opr->index_reg.table == PRES_REGTAB_COUNT)
1616         base_index = 0;
1617     else
1618         base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset));
1619 
1620     offset = get_offset_reg(table, base_index) + opr->reg.offset + comp;
1621     reg_index = get_reg_offset(table, offset);
1622 
1623     if (reg_index >= rs->table_sizes[table])
1624     {
1625         unsigned int wrap_size;
1626 
1627         if (table == PRES_REGTAB_CONST)
1628         {
1629             /* As it can be guessed from tests, offset into floating constant table is wrapped
1630              * to the nearest power of 2 and not to the actual table size. */
1631             for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1)
1632                 ;
1633         }
1634         else
1635         {
1636             wrap_size = rs->table_sizes[table];
1637         }
1638         WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n",
1639                 reg_index, table, wrap_size, rs->table_sizes[table]);
1640         reg_index %= wrap_size;
1641 
1642         if (reg_index >= rs->table_sizes[table])
1643             return 0.0;
1644 
1645         offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table);
1646     }
1647 
1648     return exec_get_reg_value(rs, table, offset);
1649 }
1650 
1651 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg,
1652         unsigned int comp, double res)
1653 {
1654     regstore_set_double(rs, reg->table, reg->offset + comp, res);
1655 }
1656 
1657 #define ARGS_ARRAY_SIZE 8
1658 static HRESULT execute_preshader(struct d3dx_preshader *pres)
1659 {
1660     unsigned int i, j, k;
1661     double args[ARGS_ARRAY_SIZE];
1662     double res;
1663 
1664     for (i = 0; i < pres->ins_count; ++i)
1665     {
1666         const struct d3dx_pres_ins *ins;
1667         const struct op_info *oi;
1668 
1669         ins = &pres->ins[i];
1670         oi = &pres_op_info[ins->op];
1671         if (oi->func_all_comps)
1672         {
1673             if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE)
1674             {
1675                 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count);
1676                 return E_FAIL;
1677             }
1678             for (k = 0; k < oi->input_count; ++k)
1679                 for (j = 0; j < ins->component_count; ++j)
1680                     args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k],
1681                             ins->scalar_op && !k ? 0 : j);
1682             res = oi->func(args, ins->component_count);
1683 
1684             /* only 'dot' instruction currently falls here */
1685             exec_set_arg(&pres->regs, &ins->output.reg, 0, res);
1686         }
1687         else
1688         {
1689             for (j = 0; j < ins->component_count; ++j)
1690             {
1691                 for (k = 0; k < oi->input_count; ++k)
1692                     args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j);
1693                 res = oi->func(args, ins->component_count);
1694                 exec_set_arg(&pres->regs, &ins->output.reg, j, res);
1695             }
1696         }
1697     }
1698     return D3D_OK;
1699 }
1700 
1701 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version)
1702 {
1703     unsigned int i;
1704 
1705     if (update_version == ULONG64_MAX)
1706         update_version = ctab->update_version;
1707     for (i = 0; i < ctab->input_count; ++i)
1708     {
1709         if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]),
1710                 update_version))
1711             return TRUE;
1712     }
1713     return FALSE;
1714 }
1715 
1716 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version)
1717 {
1718     return is_const_tab_input_dirty(&peval->pres.inputs, update_version)
1719             || is_const_tab_input_dirty(&peval->shader_inputs, update_version);
1720 }
1721 
1722 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param,
1723         void *param_value)
1724 {
1725     HRESULT hr;
1726     unsigned int i;
1727     unsigned int elements, elements_param, elements_table;
1728     float *oc;
1729 
1730     TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
1731 
1732     if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX))
1733     {
1734         set_constants(&peval->pres.regs, &peval->pres.inputs,
1735                 next_update_version(peval->version_counter),
1736                 NULL, NULL, peval->param_type, FALSE, FALSE);
1737 
1738         if (FAILED(hr = execute_preshader(&peval->pres)))
1739             return hr;
1740     }
1741 
1742     elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]);
1743     elements_param = param->bytes / sizeof(unsigned int);
1744     elements = min(elements_table, elements_param);
1745     oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST];
1746     for (i = 0; i < elements; ++i)
1747         set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT);
1748     return D3D_OK;
1749 }
1750 
1751 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1752         struct d3dx_param_eval *peval, BOOL update_all)
1753 {
1754     HRESULT hr;
1755     struct d3dx_preshader *pres = &peval->pres;
1756     struct d3dx_regstore *rs = &pres->regs;
1757     ULONG64 new_update_version = next_update_version(peval->version_counter);
1758     BOOL pres_dirty = FALSE;
1759 
1760     TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
1761 
1762     if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX))
1763     {
1764         set_constants(rs, &pres->inputs, new_update_version,
1765                 NULL, NULL, peval->param_type, FALSE, FALSE);
1766         if (FAILED(hr = execute_preshader(pres)))
1767             return hr;
1768         pres_dirty = TRUE;
1769     }
1770 
1771     return set_constants(rs, &peval->shader_inputs, new_update_version,
1772             manager, device, peval->param_type, update_all, pres_dirty);
1773 }
1774