xref: /reactos/dll/directx/wine/d3dx9_36/preshader.c (revision 1de09c47)
1 #ifdef __REACTOS__
2 #include "precomp.h"
3 #else
4 /*
5  * Copyright 2016 Paul Gofman
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
20  */
21 
22 
23 #include "d3dx9_private.h"
24 
25 #include <float.h>
26 #include <math.h>
27 #include <assert.h>
28 #endif /* __REACTOS__ */
29 
30 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
31 
32 #ifdef __REACTOS__
33 /* ReactOS FIXME: Insect */
34 #define fmin min
35 #define fmax max
36 #endif
37 
38 enum pres_ops
39 {
40     PRESHADER_OP_NOP,
41     PRESHADER_OP_MOV,
42     PRESHADER_OP_NEG,
43     PRESHADER_OP_RCP,
44     PRESHADER_OP_FRC,
45     PRESHADER_OP_EXP,
46     PRESHADER_OP_LOG,
47     PRESHADER_OP_RSQ,
48     PRESHADER_OP_SIN,
49     PRESHADER_OP_COS,
50     PRESHADER_OP_ASIN,
51     PRESHADER_OP_ACOS,
52     PRESHADER_OP_ATAN,
53     PRESHADER_OP_MIN,
54     PRESHADER_OP_MAX,
55     PRESHADER_OP_LT,
56     PRESHADER_OP_GE,
57     PRESHADER_OP_ADD,
58     PRESHADER_OP_MUL,
59     PRESHADER_OP_ATAN2,
60     PRESHADER_OP_DIV,
61     PRESHADER_OP_CMP,
62     PRESHADER_OP_DOT,
63     PRESHADER_OP_DOTSWIZ6,
64     PRESHADER_OP_DOTSWIZ8,
65 };
66 
67 typedef double (*pres_op_func)(double *args, int n);
68 
69 static double to_signed_nan(double v)
70 {
71     static const union
72     {
73         ULONG64 ulong64_value;
74         double double_value;
75     }
76     signed_nan =
77     {
78         0xfff8000000000000
79     };
80 
81     return isnan(v) ? signed_nan.double_value : v;
82 }
83 
84 static double pres_mov(double *args, int n) {return args[0];}
85 static double pres_add(double *args, int n) {return args[0] + args[1];}
86 static double pres_mul(double *args, int n) {return args[0] * args[1];}
87 static double pres_dot(double *args, int n)
88 {
89     int i;
90     double sum;
91 
92     sum = 0.0;
93     for (i = 0; i < n; ++i)
94         sum += args[i] * args[i + n];
95     return sum;
96 }
97 
98 static double pres_dotswiz6(double *args, int n)
99 {
100     return pres_dot(args, 3);
101 }
102 
103 static double pres_dotswiz8(double *args, int n)
104 {
105     return pres_dot(args, 4);
106 }
107 
108 static double pres_neg(double *args, int n) {return -args[0];}
109 static double pres_rcp(double *args, int n) {return 1.0 / args[0];}
110 static double pres_lt(double *args, int n)  {return args[0] < args[1] ? 1.0 : 0.0;}
111 static double pres_ge(double *args, int n)  {return args[0] >= args[1] ? 1.0 : 0.0;}
112 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);}
113 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);}
114 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);}
115 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];}
116 static double pres_sin(double *args, int n) {return sin(args[0]);}
117 static double pres_cos(double *args, int n) {return cos(args[0]);}
118 static double pres_rsq(double *args, int n)
119 {
120     double v;
121 
122     v = fabs(args[0]);
123     if (v == 0.0)
124         return INFINITY;
125     else
126         return 1.0 / sqrt(v);
127 }
128 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);}
129 static double pres_log(double *args, int n)
130 {
131     double v;
132 
133     v = fabs(args[0]);
134     if (v == 0.0)
135         return 0.0;
136     else
137         return log2(v);
138 }
139 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));}
140 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));}
141 static double pres_atan(double *args, int n) {return atan(args[0]);}
142 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);}
143 
144 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever
145  * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */
146 static double pres_div(double *args, int n) {return 0.0;}
147 
148 #define PRES_OPCODE_MASK 0x7ff00000
149 #define PRES_OPCODE_SHIFT 20
150 #define PRES_SCALAR_FLAG 0x80000000
151 #define PRES_NCOMP_MASK  0x0000ffff
152 
153 #define FOURCC_PRES 0x53455250
154 #define FOURCC_CLIT 0x54494c43
155 #define FOURCC_FXLC 0x434c5846
156 #define FOURCC_PRSI 0x49535250
157 #define PRES_SIGN 0x46580000
158 
159 struct op_info
160 {
161     unsigned int opcode;
162     char mnem[16];
163     unsigned int input_count;
164     BOOL func_all_comps;
165     pres_op_func func;
166 };
167 
168 static const struct op_info pres_op_info[] =
169 {
170     {0x000, "nop", 0, 0, NULL    }, /* PRESHADER_OP_NOP */
171     {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */
172     {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */
173     {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */
174     {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */
175     {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */
176     {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */
177     {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */
178     {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */
179     {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */
180     {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */
181     {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */
182     {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */
183     {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */
184     {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */
185     {0x202, "lt",  2, 0, pres_lt }, /* PRESHADER_OP_LT  */
186     {0x203, "ge",  2, 0, pres_ge }, /* PRESHADER_OP_GE  */
187     {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */
188     {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */
189     {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */
190     {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */
191     {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */
192     {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */
193     {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */
194     {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */
195 };
196 
197 enum pres_value_type
198 {
199     PRES_VT_FLOAT,
200     PRES_VT_DOUBLE,
201     PRES_VT_INT,
202     PRES_VT_BOOL,
203     PRES_VT_COUNT
204 };
205 
206 static const struct
207 {
208     unsigned int component_size;
209     enum pres_value_type type;
210 }
211 table_info[] =
212 {
213     {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */
214     {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */
215     {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */
216     {sizeof(BOOL),   PRES_VT_BOOL  }, /* PRES_REGTAB_OBCONST */
217     {sizeof(int),    PRES_VT_INT,  }, /* PRES_REGTAB_OICONST */
218     /* TODO: use double precision for 64 bit */
219     {sizeof(float),  PRES_VT_FLOAT }  /* PRES_REGTAB_TEMP */
220 };
221 
222 static const char *table_symbol[] =
223 {
224     "imm", "c", "oc", "ob", "oi", "r", "(null)",
225 };
226 
227 static const enum pres_reg_tables pres_regset2table[] =
228 {
229     PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
230     PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
231     PRES_REGTAB_CONST,    /* D3DXRS_FLOAT4 */
232     PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
233 };
234 
235 static const enum pres_reg_tables shad_regset2table[] =
236 {
237     PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
238     PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
239     PRES_REGTAB_OCONST,   /* D3DXRS_FLOAT4 */
240     PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
241 };
242 
243 struct d3dx_pres_reg
244 {
245     enum pres_reg_tables table;
246     /* offset is component index, not register index, e. g.
247        offset for component c3.y is 13 (3 * 4 + 1) */
248     unsigned int offset;
249 };
250 
251 struct d3dx_pres_operand
252 {
253     struct d3dx_pres_reg reg;
254     struct d3dx_pres_reg index_reg;
255 };
256 
257 #define MAX_INPUTS_COUNT 8
258 
259 struct d3dx_pres_ins
260 {
261     enum pres_ops op;
262     /* first input argument is scalar,
263        scalar component is propagated */
264     BOOL scalar_op;
265     unsigned int component_count;
266     struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT];
267     struct d3dx_pres_operand output;
268 };
269 
270 struct const_upload_info
271 {
272     BOOL transpose;
273     unsigned int major, minor;
274     unsigned int major_stride;
275     unsigned int major_count;
276     unsigned int count;
277     unsigned int minor_remainder;
278 };
279 
280 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type)
281 {
282     switch (type)
283     {
284         case D3DXPT_FLOAT:
285             return PRES_VT_FLOAT;
286         case D3DXPT_INT:
287             return PRES_VT_INT;
288         case D3DXPT_BOOL:
289             return PRES_VT_BOOL;
290         default:
291             FIXME("Unsupported type %u.\n", type);
292             return PRES_VT_COUNT;
293     }
294 }
295 
296 static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
297 {
298     return table == PRES_REGTAB_OBCONST ? offset : offset >> 2;
299 }
300 
301 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx)
302 {
303     return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2;
304 }
305 
306 static unsigned int get_reg_components(unsigned int table)
307 {
308     return get_offset_reg(table, 1);
309 }
310 
311 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
312 
313 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
314 {
315     unsigned int size;
316 
317     size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size;
318     if (size)
319     {
320         rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
321         if (!rs->tables[table])
322             return E_OUTOFMEMORY;
323     }
324     return D3D_OK;
325 }
326 
327 static void regstore_free_tables(struct d3dx_regstore *rs)
328 {
329     unsigned int i;
330 
331     for (i = 0; i < PRES_REGTAB_COUNT; ++i)
332     {
333         HeapFree(GetProcessHeap(), 0, rs->tables[i]);
334     }
335 }
336 
337 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data,
338         unsigned int start_offset, unsigned int count)
339 {
340     BYTE *dst = rs->tables[table];
341     const BYTE *src = data;
342     unsigned int size;
343 
344     dst += start_offset * table_info[table].component_size;
345     size = count * table_info[table].component_size;
346     assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst));
347     memcpy(dst, src, size);
348 }
349 
350 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset)
351 {
352     BYTE *p;
353 
354     p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
355     switch (table_info[table].type)
356     {
357         case PRES_VT_FLOAT:
358             return *(float *)p;
359         case PRES_VT_DOUBLE:
360             return *(double *)p;
361         default:
362             FIXME("Unexpected preshader input from table %u.\n", table);
363             return NAN;
364     }
365 }
366 
367 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v)
368 {
369     BYTE *p;
370 
371     p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
372     switch (table_info[table].type)
373     {
374         case PRES_VT_FLOAT : *(float *)p = v; break;
375         case PRES_VT_DOUBLE: *(double *)p = v; break;
376         case PRES_VT_INT   : *(int *)p = lrint(v); break;
377         case PRES_VT_BOOL  : *(BOOL *)p = !!v; break;
378         default:
379             FIXME("Bad type %u.\n", table_info[table].type);
380             break;
381     }
382 }
383 
384 static void dump_bytecode(void *data, unsigned int size)
385 {
386     unsigned int *bytecode = (unsigned int *)data;
387     unsigned int i, j, n;
388 
389     size /= sizeof(*bytecode);
390     i = 0;
391     while (i < size)
392     {
393         n = min(size - i, 8);
394         for (j = 0; j < n; ++j)
395             TRACE("0x%08x,", bytecode[i + j]);
396         i += n;
397         TRACE("\n");
398     }
399 }
400 
401 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count,
402         unsigned int fourcc, unsigned int *size)
403 {
404     /* Provide at least one value in comment section on non-NULL return. */
405     while (count > 2 && (*ptr & 0xffff) == 0xfffe)
406     {
407         unsigned int section_size;
408 
409         section_size = (*ptr >> 16);
410         if (!section_size || section_size + 1 > count)
411             break;
412         if (*(ptr + 1) == fourcc)
413         {
414             *size = section_size;
415             return ptr + 2;
416         }
417         count -= section_size + 1;
418         ptr += section_size + 1;
419     }
420     return NULL;
421 }
422 
423 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg)
424 {
425     static const enum pres_reg_tables reg_table[8] =
426     {
427         PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT,
428         PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP
429     };
430 
431     if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT)
432     {
433         FIXME("Unsupported register table %#x.\n", *ptr);
434         return NULL;
435     }
436 
437     reg->table = reg_table[*ptr++];
438     reg->offset = *ptr++;
439     return ptr;
440 }
441 
442 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr)
443 {
444     if (count < 3 || (*ptr && count < 5))
445     {
446         WARN("Byte code buffer ends unexpectedly, count %u.\n", count);
447         return NULL;
448     }
449 
450     if (*ptr)
451     {
452         if (*ptr != 1)
453         {
454             FIXME("Unknown relative addressing flag, word %#x.\n", *ptr);
455             return NULL;
456         }
457         ptr = parse_pres_reg(ptr + 1, &opr->index_reg);
458         if (!ptr)
459             return NULL;
460     }
461     else
462     {
463         opr->index_reg.table = PRES_REGTAB_COUNT;
464         ++ptr;
465     }
466 
467     ptr = parse_pres_reg(ptr, &opr->reg);
468 
469     if (opr->reg.table == PRES_REGTAB_OBCONST)
470         opr->reg.offset /= 4;
471     return ptr;
472 }
473 
474 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins)
475 {
476     unsigned int ins_code, ins_raw;
477     unsigned int input_count;
478     unsigned int i;
479 
480     if (count < 2)
481     {
482         WARN("Byte code buffer ends unexpectedly.\n");
483         return NULL;
484     }
485 
486     ins_raw = *ptr++;
487     ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT;
488     ins->component_count = ins_raw & PRES_NCOMP_MASK;
489     ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG);
490 
491     if (ins->component_count < 1 || ins->component_count > 4)
492     {
493         FIXME("Unsupported number of components %u.\n", ins->component_count);
494         return NULL;
495     }
496     input_count = *ptr++;
497     count -= 2;
498     for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i)
499         if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count)
500             break;
501     if (i == ARRAY_SIZE(pres_op_info))
502     {
503         FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw);
504         return NULL;
505     }
506     ins->op = i;
507     if (input_count > ARRAY_SIZE(ins->inputs))
508     {
509         FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count,
510                 pres_op_info[i].mnem);
511         return NULL;
512     }
513     for (i = 0; i < input_count; ++i)
514     {
515         unsigned int *p;
516 
517         p = parse_pres_arg(ptr, count, &ins->inputs[i]);
518         if (!p)
519             return NULL;
520         count -= p - ptr;
521         ptr = p;
522     }
523     ptr = parse_pres_arg(ptr, count, &ins->output);
524     if (ins->output.index_reg.table != PRES_REGTAB_COUNT)
525     {
526         FIXME("Relative addressing in output register not supported.\n");
527         return NULL;
528     }
529     if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset
530             + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1))
531             != get_reg_offset(ins->output.reg.table, ins->output.reg.offset))
532     {
533         FIXME("Instructions outputting multiple registers are not supported.\n");
534         return NULL;
535     }
536     return ptr;
537 }
538 
539 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc,
540         WORD *constantinfo_reserved)
541 {
542     const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc);
543 
544     if (!constant)
545     {
546         FIXME("Could not get constant desc.\n");
547         if (constantinfo_reserved)
548             *constantinfo_reserved = 0;
549         return D3DERR_INVALIDCALL;
550     }
551     *desc = constant->desc;
552     if (constantinfo_reserved)
553         *constantinfo_reserved = constant->constantinfo_reserved;
554     return D3D_OK;
555 }
556 
557 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set,
558         struct const_upload_info *info)
559 {
560     struct d3dx_parameter *param = const_set->param;
561     unsigned int table = const_set->table;
562 
563     info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
564             || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS);
565     if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS)
566     {
567         info->major = param->columns;
568         info->minor = param->rows;
569     }
570     else
571     {
572         info->major = param->rows;
573         info->minor = param->columns;
574     }
575 
576     if (get_reg_components(table) == 1)
577     {
578         unsigned int const_length = get_offset_reg(table, const_set->register_count);
579 
580         info->major_stride = info->minor;
581         info->major_count = const_length / info->major_stride;
582         info->minor_remainder = const_length % info->major_stride;
583     }
584     else
585     {
586         info->major_stride = get_reg_components(table);
587         info->major_count = const_set->register_count;
588         info->minor_remainder = 0;
589     }
590     info->count = info->major_count * info->minor + info->minor_remainder;
591 }
592 
593 #define INITIAL_CONST_SET_SIZE 16
594 
595 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set)
596 {
597     if (const_tab->const_set_count >= const_tab->const_set_size)
598     {
599         unsigned int new_size;
600         struct d3dx_const_param_eval_output *new_alloc;
601 
602         if (!const_tab->const_set_size)
603         {
604             new_size = INITIAL_CONST_SET_SIZE;
605             new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
606             if (!new_alloc)
607             {
608                 ERR("Out of memory.\n");
609                 return E_OUTOFMEMORY;
610             }
611         }
612         else
613         {
614             new_size = const_tab->const_set_size * 2;
615             new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
616                     sizeof(*const_tab->const_set) * new_size);
617             if (!new_alloc)
618             {
619                 ERR("Out of memory.\n");
620                 return E_OUTOFMEMORY;
621             }
622         }
623         const_tab->const_set = new_alloc;
624         const_tab->const_set_size = new_size;
625     }
626     const_tab->const_set[const_tab->const_set_count++] = *set;
627     return D3D_OK;
628 }
629 
630 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab,
631         struct d3dx_preshader *pres)
632 {
633     unsigned int i;
634     struct d3dx_const_param_eval_output const_set = {NULL};
635 
636     for (i = 0; i < pres->ins_count; ++i)
637     {
638         const struct d3dx_pres_ins *ins = &pres->ins[i];
639         const struct d3dx_pres_reg *reg = &ins->output.reg;
640 
641         if (reg->table == PRES_REGTAB_TEMP)
642             continue;
643 
644         const_set.register_index = get_reg_offset(reg->table, reg->offset);
645         const_set.register_count = 1;
646         const_set.table = reg->table;
647         const_set.constant_class = D3DXPC_FORCE_DWORD;
648         const_set.element_count = 1;
649         append_const_set(const_tab, &const_set);
650     }
651 }
652 
653 static int __cdecl compare_const_set(const void *a, const void *b)
654 {
655     const struct d3dx_const_param_eval_output *r1 = a;
656     const struct d3dx_const_param_eval_output *r2 = b;
657 
658     if (r1->table != r2->table)
659         return r1->table - r2->table;
660     return r1->register_index - r2->register_index;
661 }
662 
663 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab,
664         struct d3dx_parameter *param, unsigned int index)
665 {
666     unsigned int i, start_index = index;
667     DWORD *current_data;
668     enum pres_reg_tables current_table;
669     unsigned int current_start_offset, element_count;
670     struct d3dx_const_param_eval_output *first_const;
671 
672     if (!const_tab->const_set_count)
673         return D3D_OK;
674 
675     while (index < const_tab->const_set_count - 1)
676     {
677         first_const = &const_tab->const_set[index];
678         current_data = first_const->param->data;
679         current_table = first_const->table;
680         current_start_offset = get_offset_reg(current_table, first_const->register_index);
681         element_count = 0;
682         for (i = index; i < const_tab->const_set_count; ++i)
683         {
684             struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i];
685             unsigned int count = get_offset_reg(const_set->table,
686                     const_set->register_count * const_set->element_count);
687             unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index);
688 
689             if (!(const_set->table == current_table && current_start_offset == start_offset
690                     && const_set->direct_copy == first_const->direct_copy
691                     && current_data == const_set->param->data
692                     && (const_set->direct_copy || (first_const->param->type == const_set->param->type
693                     && first_const->param->class == const_set->param->class
694                     && first_const->param->columns == const_set->param->columns
695                     && first_const->param->rows == const_set->param->rows
696                     && first_const->register_count == const_set->register_count
697                     && (i == const_tab->const_set_count - 1
698                     || first_const->param->element_count == const_set->param->element_count)))))
699                 break;
700 
701             current_start_offset += count;
702             current_data += const_set->direct_copy ? count : const_set->param->rows
703                     * const_set->param->columns * const_set->element_count;
704             element_count += const_set->element_count;
705         }
706 
707         if (i > index + 1)
708         {
709             TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index,
710                     debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy);
711 
712             first_const->element_count = element_count;
713             if (first_const->direct_copy)
714             {
715                 first_const->element_count = 1;
716                 if (index == start_index
717                         && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT))
718                 {
719                     if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
720                         return D3DERR_INVALIDCALL;
721                     first_const->param = param;
722                 }
723                 first_const->register_count = get_reg_offset(current_table, current_start_offset)
724                         - first_const->register_index;
725             }
726             memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i],
727                     sizeof(*const_tab->const_set) * (const_tab->const_set_count - i));
728             const_tab->const_set_count -= i - index - 1;
729         }
730         else
731         {
732             TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n",
733                     const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy);
734         }
735         index = i;
736     }
737     return D3D_OK;
738 }
739 
740 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
741         D3DXHANDLE hc, struct d3dx_parameter *param)
742 {
743     D3DXCONSTANT_DESC desc;
744     unsigned int const_count, param_count, i;
745     BOOL get_element;
746     struct d3dx_const_param_eval_output const_set;
747     struct const_upload_info info;
748     enum pres_value_type table_type;
749     HRESULT hr;
750 
751     if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL)))
752         return D3DERR_INVALIDCALL;
753 
754     if (param->element_count)
755     {
756         param_count = param->element_count;
757         const_count = desc.Elements;
758         get_element = TRUE;
759     }
760     else
761     {
762         if (desc.Elements > 1)
763         {
764             FIXME("Unexpected number of constant elements %u.\n", desc.Elements);
765             return D3DERR_INVALIDCALL;
766         }
767         param_count = param->member_count;
768         const_count = desc.StructMembers;
769         get_element = FALSE;
770     }
771     if (const_count != param_count)
772     {
773         FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n",
774                 param_count, const_count);
775         return D3DERR_INVALIDCALL;
776     }
777     if (const_count)
778     {
779         HRESULT ret = D3D_OK;
780         D3DXHANDLE hc_element;
781         unsigned int index = const_tab->const_set_count;
782 
783         for (i = 0; i < const_count; ++i)
784         {
785             if (get_element)
786                 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i);
787             else
788                 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i);
789             if (!hc_element)
790             {
791                 FIXME("Could not get constant.\n");
792                 hr = D3DERR_INVALIDCALL;
793             }
794             else
795             {
796                 hr = init_set_constants_param(const_tab, ctab, hc_element, &param->members[i]);
797             }
798             if (FAILED(hr))
799                 ret = hr;
800         }
801         if (FAILED(ret))
802             return ret;
803         return merge_const_set_entries(const_tab, param, index);
804     }
805 
806     TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n",
807             debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes);
808     TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n",
809             debugstr_a(param->name), param->rows, param->columns, param->class,
810             param->flags, param->bytes);
811 
812     const_set.element_count = 1;
813     const_set.param = param;
814     const_set.constant_class = desc.Class;
815     if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table))
816     {
817         FIXME("Unknown register set %u.\n", desc.RegisterSet);
818         return D3DERR_INVALIDCALL;
819     }
820     const_set.register_index = desc.RegisterIndex;
821     const_set.table = const_tab->regset2table[desc.RegisterSet];
822     if (const_set.table >= PRES_REGTAB_COUNT)
823     {
824         ERR("Unexpected register set %u.\n", desc.RegisterSet);
825         return D3DERR_INVALIDCALL;
826     }
827     assert(table_info[const_set.table].component_size == sizeof(unsigned int));
828     assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int));
829     const_set.register_count = desc.RegisterCount;
830     table_type = table_info[const_set.table].type;
831     get_const_upload_info(&const_set, &info);
832     if (!info.count)
833     {
834         TRACE("%s has zero count, skipping.\n", debugstr_a(param->name));
835         return D3D_OK;
836     }
837 
838     if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
839         return D3DERR_INVALIDCALL;
840 
841     const_set.direct_copy = table_type_from_param_type(param->type) == table_type
842             && !info.transpose && info.minor == info.major_stride
843             && info.count == get_offset_reg(const_set.table, const_set.register_count)
844             && info.count * sizeof(unsigned int) <= param->bytes;
845     if (info.minor_remainder && !const_set.direct_copy && !info.transpose)
846         FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n",
847                 debugstr_a(param->name));
848 
849     if (info.major_count > info.major
850             || (info.major_count == info.major && info.minor_remainder))
851     {
852         WARN("Constant dimensions exceed parameter size.\n");
853         return D3DERR_INVALIDCALL;
854     }
855 
856     if (FAILED(hr = append_const_set(const_tab, &const_set)))
857         return hr;
858 
859     return D3D_OK;
860 }
861 
862 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out,
863         struct d3dx_effect *effect, const char **skip_constants,
864         unsigned int skip_constants_count, struct d3dx_preshader *pres)
865 {
866     ID3DXConstantTable *ctab;
867     D3DXCONSTANT_DESC *cdesc;
868     struct d3dx_parameter **inputs_param;
869     D3DXCONSTANTTABLE_DESC desc;
870     HRESULT hr;
871     D3DXHANDLE hc;
872     unsigned int i, j;
873 
874     hr = D3DXGetShaderConstantTable(byte_code, &ctab);
875     if (FAILED(hr) || !ctab)
876     {
877         TRACE("Could not get CTAB data, hr %#x.\n", hr);
878         /* returning OK, shaders and preshaders without CTAB are valid */
879         return D3D_OK;
880     }
881     if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc)))
882     {
883         FIXME("Could not get CTAB desc, hr %#x.\n", hr);
884         goto cleanup;
885     }
886 
887     out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants);
888     out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants);
889     if (!cdesc || !inputs_param)
890     {
891         hr = E_OUTOFMEMORY;
892         goto cleanup;
893     }
894 
895     for (i = 0; i < desc.Constants; ++i)
896     {
897         unsigned int index = out->input_count;
898         WORD constantinfo_reserved;
899 
900         hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
901         if (!hc)
902         {
903             FIXME("Null constant handle.\n");
904             goto cleanup;
905         }
906         if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved)))
907             goto cleanup;
908         inputs_param[index] = get_parameter_by_name(effect, NULL, cdesc[index].Name);
909         if (!inputs_param[index])
910         {
911             WARN("Could not find parameter %s in effect.\n", cdesc[index].Name);
912             continue;
913         }
914         if (cdesc[index].Class == D3DXPC_OBJECT)
915         {
916             TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]);
917             if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT
918                     || !is_param_type_sampler(inputs_param[index]->type))
919             {
920                 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name));
921                 hr = D3DERR_INVALIDCALL;
922                 goto cleanup;
923             }
924             if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount)
925             {
926                 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name));
927                 hr = D3DERR_INVALIDCALL;
928                 goto cleanup;
929             }
930         }
931         if (!is_top_level_parameter(inputs_param[index]))
932         {
933             WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name));
934             hr = E_FAIL;
935             goto cleanup;
936         }
937 
938         for (j = 0; j < skip_constants_count; ++j)
939         {
940             if (!strcmp(cdesc[index].Name, skip_constants[j]))
941             {
942                 if (!constantinfo_reserved)
943                 {
944                     WARN("skip_constants parameter %s is not register bound.\n",
945                             cdesc[index].Name);
946                     hr = D3DERR_INVALIDCALL;
947                     goto cleanup;
948                 }
949                 TRACE("Skipping constant %s.\n", cdesc[index].Name);
950                 break;
951             }
952         }
953         if (j < skip_constants_count)
954             continue;
955         ++out->input_count;
956         if (inputs_param[index]->class == D3DXPC_OBJECT)
957             continue;
958         if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index])))
959             goto cleanup;
960     }
961     if (pres)
962         append_pres_const_sets_for_shader_input(out, pres);
963     if (out->const_set_count)
964     {
965         struct d3dx_const_param_eval_output *new_alloc;
966 
967         qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set);
968 
969         i = 0;
970         while (i < out->const_set_count - 1)
971         {
972             if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD
973                     && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD
974                     && out->const_set[i].table == out->const_set[i + 1].table
975                     && out->const_set[i].register_index + out->const_set[i].register_count
976                     >= out->const_set[i + 1].register_index)
977             {
978                 assert(out->const_set[i].register_index + out->const_set[i].register_count
979                         <= out->const_set[i + 1].register_index + 1);
980                 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1
981                         - out->const_set[i].register_index;
982                 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i])
983                         * (out->const_set_count - i - 2));
984                 --out->const_set_count;
985             }
986             else
987             {
988                 ++i;
989             }
990         }
991 
992         new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set,
993                 sizeof(*out->const_set) * out->const_set_count);
994         if (new_alloc)
995         {
996             out->const_set = new_alloc;
997             out->const_set_size = out->const_set_count;
998         }
999         else
1000         {
1001             WARN("Out of memory.\n");
1002         }
1003     }
1004 cleanup:
1005     ID3DXConstantTable_Release(ctab);
1006     return hr;
1007 }
1008 
1009 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register)
1010 {
1011     if (table < PRES_REGTAB_COUNT)
1012         table_sizes[table] = max(table_sizes[table], max_register + 1);
1013 }
1014 
1015 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab)
1016 {
1017     unsigned int i, table, max_register;
1018 
1019     for (i = 0; i < ctab->input_count; ++i)
1020     {
1021         if (!ctab->inputs[i].RegisterCount)
1022             continue;
1023         max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1;
1024         table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1025         update_table_size(table_sizes, table, max_register);
1026     }
1027 }
1028 
1029 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count)
1030 {
1031     static const char *xyzw_str = "xyzw";
1032     unsigned int i, table;
1033 
1034     table = arg->reg.table;
1035     if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT)
1036     {
1037         TRACE("(");
1038         for (i = 0; i < component_count; ++i)
1039             TRACE(i < component_count - 1 ? "%.16e, " : "%.16e",
1040                     ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]);
1041         TRACE(")");
1042     }
1043     else
1044     {
1045         if (arg->index_reg.table == PRES_REGTAB_COUNT)
1046         {
1047             TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset));
1048         }
1049         else
1050         {
1051             unsigned int index_reg;
1052 
1053             index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset);
1054             TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset),
1055                     table_symbol[arg->index_reg.table], index_reg,
1056                     xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]);
1057         }
1058         for (i = 0; i < component_count; ++i)
1059             TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]);
1060     }
1061 }
1062 
1063 static void dump_registers(struct d3dx_const_tab *ctab)
1064 {
1065     unsigned int table, i;
1066 
1067     for (i = 0; i < ctab->input_count; ++i)
1068     {
1069         table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1070         TRACE("//   %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)",
1071                 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount);
1072     }
1073 }
1074 
1075 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins)
1076 {
1077     unsigned int i;
1078 
1079     TRACE("%s ", pres_op_info[ins->op].mnem);
1080     dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count);
1081     for (i = 0; i < pres_op_info[ins->op].input_count; ++i)
1082     {
1083         TRACE(", ");
1084         dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count);
1085     }
1086     TRACE("\n");
1087 }
1088 
1089 static void dump_preshader(struct d3dx_preshader *pres)
1090 {
1091     unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4;
1092     const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED];
1093 
1094     if (immediate_count)
1095         TRACE("// Immediates:\n");
1096     for (i = 0; i < immediate_count; ++i)
1097     {
1098         if (!(i % 4))
1099             TRACE("// ");
1100         TRACE("%.8e", immediates[i]);
1101         if (i % 4 == 3)
1102             TRACE("\n");
1103         else
1104             TRACE(", ");
1105     }
1106     TRACE("// Preshader registers:\n");
1107     dump_registers(&pres->inputs);
1108     TRACE("preshader\n");
1109     for (i = 0; i < pres->ins_count; ++i)
1110         dump_ins(&pres->regs, &pres->ins[i]);
1111 }
1112 
1113 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx_effect *effect)
1114 {
1115     unsigned int *p;
1116     unsigned int i, j, const_count;
1117     double *dconst;
1118     HRESULT hr;
1119     unsigned int saved_word;
1120     unsigned int section_size;
1121 
1122     TRACE("Preshader version %#x.\n", *ptr & 0xffff);
1123 
1124     if (!count)
1125     {
1126         WARN("Unexpected end of byte code buffer.\n");
1127         return D3DXERR_INVALIDDATA;
1128     }
1129 
1130     p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, &section_size);
1131     if (p)
1132     {
1133         const_count = *p++;
1134         if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int)))
1135         {
1136             WARN("Byte code buffer ends unexpectedly.\n");
1137             return D3DXERR_INVALIDDATA;
1138         }
1139         dconst = (double *)p;
1140     }
1141     else
1142     {
1143         const_count = 0;
1144         dconst = NULL;
1145     }
1146     TRACE("%u double constants.\n", const_count);
1147 
1148     p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, &section_size);
1149     if (!p)
1150     {
1151         WARN("Could not find preshader code.\n");
1152         return D3D_OK;
1153     }
1154     pres->ins_count = *p++;
1155     --section_size;
1156     if (pres->ins_count > UINT_MAX / sizeof(*pres->ins))
1157     {
1158         WARN("Invalid instruction count %u.\n", pres->ins_count);
1159         return D3DXERR_INVALIDDATA;
1160     }
1161     TRACE("%u instructions.\n", pres->ins_count);
1162     pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count);
1163     if (!pres->ins)
1164         return E_OUTOFMEMORY;
1165     for (i = 0; i < pres->ins_count; ++i)
1166     {
1167         unsigned int *ptr_next;
1168 
1169         ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]);
1170         if (!ptr_next)
1171             return D3DXERR_INVALIDDATA;
1172         section_size -= ptr_next - p;
1173         p = ptr_next;
1174     }
1175 
1176     pres->inputs.regset2table = pres_regset2table;
1177 
1178     saved_word = *ptr;
1179     *ptr = 0xfffe0000;
1180     hr = get_constants_desc(ptr, &pres->inputs, effect, NULL, 0, NULL);
1181     *ptr = saved_word;
1182     if (FAILED(hr))
1183         return hr;
1184 
1185     if (const_count % get_reg_components(PRES_REGTAB_IMMED))
1186     {
1187         FIXME("const_count %u is not a multiple of %u.\n", const_count,
1188                 get_reg_components(PRES_REGTAB_IMMED));
1189         return D3DXERR_INVALIDDATA;
1190     }
1191     pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count);
1192 
1193     update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs);
1194     for (i = 0; i < pres->ins_count; ++i)
1195     {
1196         for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j)
1197         {
1198             enum pres_reg_tables table;
1199             unsigned int reg_idx;
1200 
1201             if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT)
1202             {
1203                 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0
1204                         : pres->ins[i].component_count - 1;
1205 
1206                 table = pres->ins[i].inputs[j].reg.table;
1207                 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset
1208                         + last_component_index);
1209             }
1210             else
1211             {
1212                 table = pres->ins[i].inputs[j].index_reg.table;
1213                 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset);
1214             }
1215             if (reg_idx >= pres->regs.table_sizes[table])
1216             {
1217                 /* Native accepts these broken preshaders. */
1218                 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u, preshader parsing failed.\n",
1219                         i, j, table, reg_idx);
1220                 return D3DXERR_INVALIDDATA;
1221             }
1222         }
1223         update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table,
1224                 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset));
1225     }
1226     if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED)))
1227         return E_OUTOFMEMORY;
1228     regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count);
1229 
1230     return D3D_OK;
1231 }
1232 
1233 HRESULT d3dx_create_param_eval(struct d3dx_effect *effect, void *byte_code, unsigned int byte_code_size,
1234         D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter,
1235         const char **skip_constants, unsigned int skip_constants_count)
1236 {
1237     struct d3dx_param_eval *peval;
1238     unsigned int *ptr, *shader_ptr = NULL;
1239     unsigned int i;
1240     BOOL shader;
1241     unsigned int count, pres_size;
1242     HRESULT ret;
1243 
1244     TRACE("effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n",
1245             effect, byte_code, byte_code_size, type, peval_out);
1246 
1247     count = byte_code_size / sizeof(unsigned int);
1248     if (!byte_code || !count)
1249     {
1250         *peval_out = NULL;
1251         return D3D_OK;
1252     }
1253 
1254     peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval));
1255     if (!peval)
1256     {
1257         ret = E_OUTOFMEMORY;
1258         goto err_out;
1259     }
1260     peval->version_counter = version_counter;
1261 
1262     peval->param_type = type;
1263     switch (type)
1264     {
1265         case D3DXPT_VERTEXSHADER:
1266         case D3DXPT_PIXELSHADER:
1267             shader = TRUE;
1268             break;
1269         default:
1270             shader = FALSE;
1271             break;
1272     }
1273     peval->shader_inputs.regset2table = shad_regset2table;
1274 
1275     ptr = (unsigned int *)byte_code;
1276     if (shader)
1277     {
1278         if ((*ptr & 0xfffe0000) != 0xfffe0000)
1279         {
1280             FIXME("Invalid shader signature %#x.\n", *ptr);
1281             ret = D3DXERR_INVALIDDATA;
1282             goto err_out;
1283         }
1284         TRACE("Shader version %#x.\n", *ptr & 0xffff);
1285         shader_ptr = ptr;
1286         ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size);
1287         if (!ptr)
1288             TRACE("No preshader found.\n");
1289     }
1290     else
1291     {
1292         pres_size = count;
1293     }
1294 
1295     if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, effect)))
1296     {
1297         FIXME("Failed parsing preshader, byte code for analysis follows.\n");
1298         dump_bytecode(byte_code, byte_code_size);
1299         goto err_out;
1300     }
1301 
1302     if (shader)
1303     {
1304         if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, effect,
1305                 skip_constants, skip_constants_count, &peval->pres)))
1306         {
1307             TRACE("Could not get shader constant table, hr %#x.\n", ret);
1308             goto err_out;
1309         }
1310         update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs);
1311     }
1312 
1313     for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i)
1314     {
1315         if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i)))
1316             goto err_out;
1317     }
1318 
1319     if (TRACE_ON(d3dx))
1320     {
1321         dump_bytecode(byte_code, byte_code_size);
1322         dump_preshader(&peval->pres);
1323         if (shader)
1324         {
1325             TRACE("// Shader registers:\n");
1326             dump_registers(&peval->shader_inputs);
1327         }
1328     }
1329     *peval_out = peval;
1330     TRACE("Created parameter evaluator %p.\n", *peval_out);
1331     return D3D_OK;
1332 
1333 err_out:
1334     WARN("Error creating parameter evaluator.\n");
1335     if (TRACE_ON(d3dx))
1336         dump_bytecode(byte_code, byte_code_size);
1337 
1338     d3dx_free_param_eval(peval);
1339     *peval_out = NULL;
1340     return ret;
1341 }
1342 
1343 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
1344 {
1345     HeapFree(GetProcessHeap(), 0, ctab->inputs);
1346     HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
1347     HeapFree(GetProcessHeap(), 0, ctab->const_set);
1348 }
1349 
1350 static void d3dx_free_preshader(struct d3dx_preshader *pres)
1351 {
1352     HeapFree(GetProcessHeap(), 0, pres->ins);
1353 
1354     regstore_free_tables(&pres->regs);
1355     d3dx_free_const_tab(&pres->inputs);
1356 }
1357 
1358 void d3dx_free_param_eval(struct d3dx_param_eval *peval)
1359 {
1360     TRACE("peval %p.\n", peval);
1361 
1362     if (!peval)
1363         return;
1364 
1365     d3dx_free_preshader(&peval->pres);
1366     d3dx_free_const_tab(&peval->shader_inputs);
1367     HeapFree(GetProcessHeap(), 0, peval);
1368 }
1369 
1370 static void pres_int_from_float(void *out, const void *in, unsigned int count)
1371 {
1372     unsigned int i;
1373     const float *in_float = in;
1374     int *out_int = out;
1375 
1376     for (i = 0; i < count; ++i)
1377         out_int[i] = in_float[i];
1378 }
1379 
1380 static void pres_bool_from_value(void *out, const void *in, unsigned int count)
1381 {
1382     unsigned int i;
1383     const DWORD *in_dword = in;
1384     BOOL *out_bool = out;
1385 
1386     for (i = 0; i < count; ++i)
1387         out_bool[i] = !!in_dword[i];
1388 }
1389 
1390 static void pres_float_from_int(void *out, const void *in, unsigned int count)
1391 {
1392     unsigned int i;
1393     const int *in_int = in;
1394     float *out_float = out;
1395 
1396     for (i = 0; i < count; ++i)
1397         out_float[i] = in_int[i];
1398 }
1399 
1400 static void pres_float_from_bool(void *out, const void *in, unsigned int count)
1401 {
1402     unsigned int i;
1403     const BOOL *in_bool = in;
1404     float *out_float = out;
1405 
1406     for (i = 0; i < count; ++i)
1407         out_float[i] = !!in_bool[i];
1408 }
1409 
1410 static void pres_int_from_bool(void *out, const void *in, unsigned int count)
1411 {
1412     unsigned int i;
1413     const float *in_bool = in;
1414     int *out_int = out;
1415 
1416     for (i = 0; i < count; ++i)
1417         out_int[i] = !!in_bool[i];
1418 }
1419 
1420 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table,
1421         unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type)
1422 {
1423     typedef void (*conv_func)(void *out, const void *in, unsigned int count);
1424     static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] =
1425     {
1426         {NULL,                 NULL, pres_int_from_float, pres_bool_from_value},
1427         {NULL,                 NULL, NULL,                NULL},
1428         {pres_float_from_int,  NULL, NULL,                pres_bool_from_value},
1429         {pres_float_from_bool, NULL, pres_int_from_bool,  NULL}
1430     };
1431     enum pres_value_type table_type = table_info[table].type;
1432 
1433     if (param_type == table_type)
1434     {
1435         regstore_set_values(rs, table, in, offset, count);
1436         return;
1437     }
1438 
1439     set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count);
1440 }
1441 
1442 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1443         D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr,
1444         unsigned int start, unsigned int count)
1445 {
1446     if (type == D3DXPT_VERTEXSHADER)
1447     {
1448         switch(table)
1449         {
1450             case PRES_REGTAB_OCONST:
1451                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count);
1452             case PRES_REGTAB_OICONST:
1453                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count);
1454             case PRES_REGTAB_OBCONST:
1455                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count);
1456             default:
1457                 FIXME("Unexpected register table %u.\n", table);
1458                 return D3DERR_INVALIDCALL;
1459         }
1460     }
1461     else if (type == D3DXPT_PIXELSHADER)
1462     {
1463         switch(table)
1464         {
1465             case PRES_REGTAB_OCONST:
1466                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count);
1467             case PRES_REGTAB_OICONST:
1468                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count);
1469             case PRES_REGTAB_OBCONST:
1470                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count);
1471             default:
1472                 FIXME("Unexpected register table %u.\n", table);
1473                 return D3DERR_INVALIDCALL;
1474         }
1475     }
1476     else
1477     {
1478         FIXME("Unexpected parameter type %u.\n", type);
1479         return D3DERR_INVALIDCALL;
1480     }
1481 }
1482 
1483 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
1484         ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1485         D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty)
1486 {
1487     unsigned int const_idx;
1488     unsigned int current_start = 0, current_count = 0;
1489     enum pres_reg_tables current_table = PRES_REGTAB_COUNT;
1490     BOOL update_device = manager || device;
1491     HRESULT hr, result = D3D_OK;
1492     ULONG64 update_version = const_tab->update_version;
1493 
1494     for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1495     {
1496         struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1497         enum pres_reg_tables table = const_set->table;
1498         struct d3dx_parameter *param = const_set->param;
1499         unsigned int element, i, j, start_offset;
1500         struct const_upload_info info;
1501         unsigned int *data;
1502         enum pres_value_type param_type;
1503 
1504         if (!(param && is_param_dirty(param, update_version)))
1505             continue;
1506 
1507         data = param->data;
1508         start_offset = get_offset_reg(table, const_set->register_index);
1509         if (const_set->direct_copy)
1510         {
1511             regstore_set_values(rs, table, data, start_offset,
1512                     get_offset_reg(table, const_set->register_count));
1513             continue;
1514         }
1515         param_type = table_type_from_param_type(param->type);
1516         if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR)
1517         {
1518             unsigned int count = max(param->rows, param->columns);
1519 
1520             if (count >= get_reg_components(table))
1521             {
1522                 regstore_set_data(rs, table, start_offset, data,
1523                         count * const_set->element_count, param_type);
1524             }
1525             else
1526             {
1527                 for (element = 0; element < const_set->element_count; ++element)
1528                     regstore_set_data(rs, table, start_offset + get_offset_reg(table, element),
1529                             &data[element * count], count, param_type);
1530             }
1531             continue;
1532         }
1533         get_const_upload_info(const_set, &info);
1534         for (element = 0; element < const_set->element_count; ++element)
1535         {
1536             unsigned int *out = (unsigned int *)rs->tables[table] + start_offset;
1537 
1538             /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer.
1539              * All the supported types of parameters and table values have the same size. */
1540             if (info.transpose)
1541             {
1542                 for (i = 0; i < info.major_count; ++i)
1543                     for (j = 0; j < info.minor; ++j)
1544                         out[i * info.major_stride + j] = data[i + j * info.major];
1545 
1546                 for (j = 0; j < info.minor_remainder; ++j)
1547                     out[i * info.major_stride + j] = data[i + j * info.major];
1548             }
1549             else
1550             {
1551                 for (i = 0; i < info.major_count; ++i)
1552                     for (j = 0; j < info.minor; ++j)
1553                         out[i * info.major_stride + j] = data[i * info.minor + j];
1554             }
1555             start_offset += get_offset_reg(table, const_set->register_count);
1556             data += param->rows * param->columns;
1557         }
1558         start_offset = get_offset_reg(table, const_set->register_index);
1559         if (table_info[table].type != param_type)
1560             regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset,
1561                     get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type);
1562     }
1563     const_tab->update_version = new_update_version;
1564     if (!update_device)
1565         return D3D_OK;
1566 
1567     for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1568     {
1569         struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1570 
1571         if (device_update_all || (const_set->param
1572                 ? is_param_dirty(const_set->param, update_version) : pres_dirty))
1573         {
1574             enum pres_reg_tables table = const_set->table;
1575 
1576             if (table == current_table && current_start + current_count == const_set->register_index)
1577             {
1578                 current_count += const_set->register_count * const_set->element_count;
1579             }
1580             else
1581             {
1582                 if (current_count)
1583                 {
1584                     if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1585                             (DWORD *)rs->tables[current_table]
1586                             + get_offset_reg(current_table, current_start), current_start, current_count)))
1587                         result = hr;
1588                 }
1589                 current_table = table;
1590                 current_start = const_set->register_index;
1591                 current_count = const_set->register_count * const_set->element_count;
1592             }
1593         }
1594     }
1595     if (current_count)
1596     {
1597         if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1598                 (DWORD *)rs->tables[current_table]
1599                 + get_offset_reg(current_table, current_start), current_start, current_count)))
1600             result = hr;
1601     }
1602     return result;
1603 }
1604 
1605 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset)
1606 {
1607     return regstore_get_double(rs, table, offset);
1608 }
1609 
1610 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp)
1611 {
1612     unsigned int offset, base_index, reg_index, table;
1613 
1614     table = opr->reg.table;
1615 
1616     if (opr->index_reg.table == PRES_REGTAB_COUNT)
1617         base_index = 0;
1618     else
1619         base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset));
1620 
1621     offset = get_offset_reg(table, base_index) + opr->reg.offset + comp;
1622     reg_index = get_reg_offset(table, offset);
1623 
1624     if (reg_index >= rs->table_sizes[table])
1625     {
1626         unsigned int wrap_size;
1627 
1628         if (table == PRES_REGTAB_CONST)
1629         {
1630             /* As it can be guessed from tests, offset into floating constant table is wrapped
1631              * to the nearest power of 2 and not to the actual table size. */
1632             for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1)
1633                 ;
1634         }
1635         else
1636         {
1637             wrap_size = rs->table_sizes[table];
1638         }
1639         WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n",
1640                 reg_index, table, wrap_size, rs->table_sizes[table]);
1641         reg_index %= wrap_size;
1642 
1643         if (reg_index >= rs->table_sizes[table])
1644             return 0.0;
1645 
1646         offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table);
1647     }
1648 
1649     return exec_get_reg_value(rs, table, offset);
1650 }
1651 
1652 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg,
1653         unsigned int comp, double res)
1654 {
1655     regstore_set_double(rs, reg->table, reg->offset + comp, res);
1656 }
1657 
1658 #define ARGS_ARRAY_SIZE 8
1659 static HRESULT execute_preshader(struct d3dx_preshader *pres)
1660 {
1661     unsigned int i, j, k;
1662     double args[ARGS_ARRAY_SIZE];
1663     double res;
1664 
1665     for (i = 0; i < pres->ins_count; ++i)
1666     {
1667         const struct d3dx_pres_ins *ins;
1668         const struct op_info *oi;
1669 
1670         ins = &pres->ins[i];
1671         oi = &pres_op_info[ins->op];
1672         if (oi->func_all_comps)
1673         {
1674             if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE)
1675             {
1676                 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count);
1677                 return E_FAIL;
1678             }
1679             for (k = 0; k < oi->input_count; ++k)
1680                 for (j = 0; j < ins->component_count; ++j)
1681                     args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k],
1682                             ins->scalar_op && !k ? 0 : j);
1683             res = oi->func(args, ins->component_count);
1684 
1685             /* only 'dot' instruction currently falls here */
1686             exec_set_arg(&pres->regs, &ins->output.reg, 0, res);
1687         }
1688         else
1689         {
1690             for (j = 0; j < ins->component_count; ++j)
1691             {
1692                 for (k = 0; k < oi->input_count; ++k)
1693                     args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j);
1694                 res = oi->func(args, ins->component_count);
1695                 exec_set_arg(&pres->regs, &ins->output.reg, j, res);
1696             }
1697         }
1698     }
1699     return D3D_OK;
1700 }
1701 
1702 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version)
1703 {
1704     unsigned int i;
1705 
1706     if (update_version == ULONG64_MAX)
1707         update_version = ctab->update_version;
1708     for (i = 0; i < ctab->input_count; ++i)
1709     {
1710         if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]),
1711                 update_version))
1712             return TRUE;
1713     }
1714     return FALSE;
1715 }
1716 
1717 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version)
1718 {
1719     return is_const_tab_input_dirty(&peval->pres.inputs, update_version)
1720             || is_const_tab_input_dirty(&peval->shader_inputs, update_version);
1721 }
1722 
1723 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param,
1724         void *param_value)
1725 {
1726     HRESULT hr;
1727     unsigned int i;
1728     unsigned int elements, elements_param, elements_table;
1729     float *oc;
1730 
1731     TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
1732 
1733     if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX))
1734     {
1735         set_constants(&peval->pres.regs, &peval->pres.inputs,
1736                 next_update_version(peval->version_counter),
1737                 NULL, NULL, peval->param_type, FALSE, FALSE);
1738 
1739         if (FAILED(hr = execute_preshader(&peval->pres)))
1740             return hr;
1741     }
1742 
1743     elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]);
1744     elements_param = param->bytes / sizeof(unsigned int);
1745     elements = min(elements_table, elements_param);
1746     oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST];
1747     for (i = 0; i < elements; ++i)
1748         set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT);
1749     return D3D_OK;
1750 }
1751 
1752 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1753         struct d3dx_param_eval *peval, BOOL update_all)
1754 {
1755     HRESULT hr;
1756     struct d3dx_preshader *pres = &peval->pres;
1757     struct d3dx_regstore *rs = &pres->regs;
1758     ULONG64 new_update_version = next_update_version(peval->version_counter);
1759     BOOL pres_dirty = FALSE;
1760 
1761     TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
1762 
1763     if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX))
1764     {
1765         set_constants(rs, &pres->inputs, new_update_version,
1766                 NULL, NULL, peval->param_type, FALSE, FALSE);
1767         if (FAILED(hr = execute_preshader(pres)))
1768             return hr;
1769         pres_dirty = TRUE;
1770     }
1771 
1772     return set_constants(rs, &peval->shader_inputs, new_update_version,
1773             manager, device, peval->param_type, update_all, pres_dirty);
1774 }
1775