xref: /reactos/dll/directx/wine/d3dx9_36/preshader.c (revision b8dd046e)
1 /*
2  * Copyright 2016 Paul Gofman
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17  */
18 
19 #include "config.h"
20 #include "wine/port.h"
21 
22 #include "d3dx9_private.h"
23 
24 #include <float.h>
25 #include <assert.h>
26 
27 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
28 
29 #ifdef __REACTOS__
30 /* ReactOS FIXME: Insect */
31 #define fmin min
32 #define fmax max
33 #endif
34 
35 enum pres_ops
36 {
37     PRESHADER_OP_NOP,
38     PRESHADER_OP_MOV,
39     PRESHADER_OP_NEG,
40     PRESHADER_OP_RCP,
41     PRESHADER_OP_FRC,
42     PRESHADER_OP_EXP,
43     PRESHADER_OP_LOG,
44     PRESHADER_OP_RSQ,
45     PRESHADER_OP_SIN,
46     PRESHADER_OP_COS,
47     PRESHADER_OP_ASIN,
48     PRESHADER_OP_ACOS,
49     PRESHADER_OP_ATAN,
50     PRESHADER_OP_MIN,
51     PRESHADER_OP_MAX,
52     PRESHADER_OP_LT,
53     PRESHADER_OP_GE,
54     PRESHADER_OP_ADD,
55     PRESHADER_OP_MUL,
56     PRESHADER_OP_ATAN2,
57     PRESHADER_OP_DIV,
58     PRESHADER_OP_CMP,
59     PRESHADER_OP_DOT,
60     PRESHADER_OP_DOTSWIZ6,
61     PRESHADER_OP_DOTSWIZ8,
62 };
63 
64 typedef double (*pres_op_func)(double *args, int n);
65 
66 static double to_signed_nan(double v)
67 {
68     static const union
69     {
70         ULONG64 ulong64_value;
71         double double_value;
72     }
73     signed_nan =
74     {
75         0xfff8000000000000
76     };
77 
78     return isnan(v) ? signed_nan.double_value : v;
79 }
80 
81 static double pres_mov(double *args, int n) {return args[0];}
82 static double pres_add(double *args, int n) {return args[0] + args[1];}
83 static double pres_mul(double *args, int n) {return args[0] * args[1];}
84 static double pres_dot(double *args, int n)
85 {
86     int i;
87     double sum;
88 
89     sum = 0.0;
90     for (i = 0; i < n; ++i)
91         sum += args[i] * args[i + n];
92     return sum;
93 }
94 
95 static double pres_dotswiz6(double *args, int n)
96 {
97     return pres_dot(args, 3);
98 }
99 
100 static double pres_dotswiz8(double *args, int n)
101 {
102     return pres_dot(args, 4);
103 }
104 
105 static double pres_neg(double *args, int n) {return -args[0];}
106 static double pres_rcp(double *args, int n) {return 1.0 / args[0];}
107 static double pres_lt(double *args, int n)  {return args[0] < args[1] ? 1.0 : 0.0;}
108 static double pres_ge(double *args, int n)  {return args[0] >= args[1] ? 1.0 : 0.0;}
109 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);}
110 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);}
111 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);}
112 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];}
113 static double pres_sin(double *args, int n) {return sin(args[0]);}
114 static double pres_cos(double *args, int n) {return cos(args[0]);}
115 static double pres_rsq(double *args, int n)
116 {
117     double v;
118 
119     v = fabs(args[0]);
120     if (v == 0.0)
121         return INFINITY;
122     else
123         return 1.0 / sqrt(v);
124 }
125 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);}
126 static double pres_log(double *args, int n)
127 {
128     double v;
129 
130     v = fabs(args[0]);
131     if (v == 0.0)
132         return 0.0;
133     else
134 #ifdef HAVE_LOG2
135         return log2(v);
136 #else
137         return log(v) / log(2);
138 #endif
139 }
140 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));}
141 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));}
142 static double pres_atan(double *args, int n) {return atan(args[0]);}
143 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);}
144 
145 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever
146  * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */
147 static double pres_div(double *args, int n) {return 0.0;}
148 
149 #define PRES_OPCODE_MASK 0x7ff00000
150 #define PRES_OPCODE_SHIFT 20
151 #define PRES_SCALAR_FLAG 0x80000000
152 #define PRES_NCOMP_MASK  0x0000ffff
153 
154 #define FOURCC_PRES 0x53455250
155 #define FOURCC_CLIT 0x54494c43
156 #define FOURCC_FXLC 0x434c5846
157 #define FOURCC_PRSI 0x49535250
158 #define PRES_SIGN 0x46580000
159 
160 struct op_info
161 {
162     unsigned int opcode;
163     char mnem[16];
164     unsigned int input_count;
165     BOOL func_all_comps;
166     pres_op_func func;
167 };
168 
169 static const struct op_info pres_op_info[] =
170 {
171     {0x000, "nop", 0, 0, NULL    }, /* PRESHADER_OP_NOP */
172     {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */
173     {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */
174     {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */
175     {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */
176     {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */
177     {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */
178     {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */
179     {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */
180     {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */
181     {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */
182     {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */
183     {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */
184     {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */
185     {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */
186     {0x202, "lt",  2, 0, pres_lt }, /* PRESHADER_OP_LT  */
187     {0x203, "ge",  2, 0, pres_ge }, /* PRESHADER_OP_GE  */
188     {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */
189     {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */
190     {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */
191     {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */
192     {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */
193     {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */
194     {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */
195     {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */
196 };
197 
198 enum pres_value_type
199 {
200     PRES_VT_FLOAT,
201     PRES_VT_DOUBLE,
202     PRES_VT_INT,
203     PRES_VT_BOOL,
204     PRES_VT_COUNT
205 };
206 
207 static const struct
208 {
209     unsigned int component_size;
210     enum pres_value_type type;
211 }
212 table_info[] =
213 {
214     {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */
215     {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */
216     {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */
217     {sizeof(BOOL),   PRES_VT_BOOL  }, /* PRES_REGTAB_OBCONST */
218     {sizeof(int),    PRES_VT_INT,  }, /* PRES_REGTAB_OICONST */
219     /* TODO: use double precision for 64 bit */
220     {sizeof(float),  PRES_VT_FLOAT }  /* PRES_REGTAB_TEMP */
221 };
222 
223 static const char *table_symbol[] =
224 {
225     "imm", "c", "oc", "ob", "oi", "r", "(null)",
226 };
227 
228 static const enum pres_reg_tables pres_regset2table[] =
229 {
230     PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
231     PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
232     PRES_REGTAB_CONST,    /* D3DXRS_FLOAT4 */
233     PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
234 };
235 
236 static const enum pres_reg_tables shad_regset2table[] =
237 {
238     PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
239     PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
240     PRES_REGTAB_OCONST,   /* D3DXRS_FLOAT4 */
241     PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
242 };
243 
244 struct d3dx_pres_reg
245 {
246     enum pres_reg_tables table;
247     /* offset is component index, not register index, e. g.
248        offset for component c3.y is 13 (3 * 4 + 1) */
249     unsigned int offset;
250 };
251 
252 struct d3dx_pres_operand
253 {
254     struct d3dx_pres_reg reg;
255     struct d3dx_pres_reg index_reg;
256 };
257 
258 #define MAX_INPUTS_COUNT 8
259 
260 struct d3dx_pres_ins
261 {
262     enum pres_ops op;
263     /* first input argument is scalar,
264        scalar component is propagated */
265     BOOL scalar_op;
266     unsigned int component_count;
267     struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT];
268     struct d3dx_pres_operand output;
269 };
270 
271 struct const_upload_info
272 {
273     BOOL transpose;
274     unsigned int major, minor;
275     unsigned int major_stride;
276     unsigned int major_count;
277     unsigned int count;
278     unsigned int minor_remainder;
279 };
280 
281 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type)
282 {
283     switch (type)
284     {
285         case D3DXPT_FLOAT:
286             return PRES_VT_FLOAT;
287         case D3DXPT_INT:
288             return PRES_VT_INT;
289         case D3DXPT_BOOL:
290             return PRES_VT_BOOL;
291         default:
292             FIXME("Unsupported type %u.\n", type);
293             return PRES_VT_COUNT;
294     }
295 }
296 
297 static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
298 {
299     return table == PRES_REGTAB_OBCONST ? offset : offset >> 2;
300 }
301 
302 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx)
303 {
304     return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2;
305 }
306 
307 static unsigned int get_reg_components(unsigned int table)
308 {
309     return get_offset_reg(table, 1);
310 }
311 
312 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
313 
314 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
315 {
316     unsigned int size;
317 
318     size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size;
319     if (size)
320     {
321         rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
322         if (!rs->tables[table])
323             return E_OUTOFMEMORY;
324     }
325     return D3D_OK;
326 }
327 
328 static void regstore_free_tables(struct d3dx_regstore *rs)
329 {
330     unsigned int i;
331 
332     for (i = 0; i < PRES_REGTAB_COUNT; ++i)
333     {
334         HeapFree(GetProcessHeap(), 0, rs->tables[i]);
335     }
336 }
337 
338 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data,
339         unsigned int start_offset, unsigned int count)
340 {
341     BYTE *dst = rs->tables[table];
342     const BYTE *src = data;
343     unsigned int size;
344 
345     dst += start_offset * table_info[table].component_size;
346     size = count * table_info[table].component_size;
347     assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst));
348     memcpy(dst, src, size);
349 }
350 
351 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset)
352 {
353     BYTE *p;
354 
355     p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
356     switch (table_info[table].type)
357     {
358         case PRES_VT_FLOAT:
359             return *(float *)p;
360         case PRES_VT_DOUBLE:
361             return *(double *)p;
362         default:
363             FIXME("Unexpected preshader input from table %u.\n", table);
364             return NAN;
365     }
366 }
367 
368 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v)
369 {
370     BYTE *p;
371 
372     p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
373     switch (table_info[table].type)
374     {
375         case PRES_VT_FLOAT : *(float *)p = v; break;
376         case PRES_VT_DOUBLE: *(double *)p = v; break;
377         case PRES_VT_INT   : *(int *)p = lrint(v); break;
378         case PRES_VT_BOOL  : *(BOOL *)p = !!v; break;
379         default:
380             FIXME("Bad type %u.\n", table_info[table].type);
381             break;
382     }
383 }
384 
385 static void dump_bytecode(void *data, unsigned int size)
386 {
387     unsigned int *bytecode = (unsigned int *)data;
388     unsigned int i, j, n;
389 
390     size /= sizeof(*bytecode);
391     i = 0;
392     while (i < size)
393     {
394         n = min(size - i, 8);
395         for (j = 0; j < n; ++j)
396             TRACE("0x%08x,", bytecode[i + j]);
397         i += n;
398         TRACE("\n");
399     }
400 }
401 
402 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count,
403         unsigned int fourcc, unsigned int *size)
404 {
405     /* Provide at least one value in comment section on non-NULL return. */
406     while (count > 2 && (*ptr & 0xffff) == 0xfffe)
407     {
408         unsigned int section_size;
409 
410         section_size = (*ptr >> 16);
411         if (!section_size || section_size + 1 > count)
412             break;
413         if (*(ptr + 1) == fourcc)
414         {
415             *size = section_size;
416             return ptr + 2;
417         }
418         count -= section_size + 1;
419         ptr += section_size + 1;
420     }
421     return NULL;
422 }
423 
424 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg)
425 {
426     static const enum pres_reg_tables reg_table[8] =
427     {
428         PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT,
429         PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP
430     };
431 
432     if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT)
433     {
434         FIXME("Unsupported register table %#x.\n", *ptr);
435         return NULL;
436     }
437 
438     reg->table = reg_table[*ptr++];
439     reg->offset = *ptr++;
440     return ptr;
441 }
442 
443 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr)
444 {
445     if (count < 3 || (*ptr && count < 5))
446     {
447         WARN("Byte code buffer ends unexpectedly, count %u.\n", count);
448         return NULL;
449     }
450 
451     if (*ptr)
452     {
453         if (*ptr != 1)
454         {
455             FIXME("Unknown relative addressing flag, word %#x.\n", *ptr);
456             return NULL;
457         }
458         ptr = parse_pres_reg(ptr + 1, &opr->index_reg);
459         if (!ptr)
460             return NULL;
461     }
462     else
463     {
464         opr->index_reg.table = PRES_REGTAB_COUNT;
465         ++ptr;
466     }
467 
468     ptr = parse_pres_reg(ptr, &opr->reg);
469 
470     if (opr->reg.table == PRES_REGTAB_OBCONST)
471         opr->reg.offset /= 4;
472     return ptr;
473 }
474 
475 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins)
476 {
477     unsigned int ins_code, ins_raw;
478     unsigned int input_count;
479     unsigned int i;
480 
481     if (count < 2)
482     {
483         WARN("Byte code buffer ends unexpectedly.\n");
484         return NULL;
485     }
486 
487     ins_raw = *ptr++;
488     ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT;
489     ins->component_count = ins_raw & PRES_NCOMP_MASK;
490     ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG);
491 
492     if (ins->component_count < 1 || ins->component_count > 4)
493     {
494         FIXME("Unsupported number of components %u.\n", ins->component_count);
495         return NULL;
496     }
497     input_count = *ptr++;
498     count -= 2;
499     for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i)
500         if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count)
501             break;
502     if (i == ARRAY_SIZE(pres_op_info))
503     {
504         FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw);
505         return NULL;
506     }
507     ins->op = i;
508     if (input_count > ARRAY_SIZE(ins->inputs))
509     {
510         FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count,
511                 pres_op_info[i].mnem);
512         return NULL;
513     }
514     for (i = 0; i < input_count; ++i)
515     {
516         unsigned int *p;
517 
518         p = parse_pres_arg(ptr, count, &ins->inputs[i]);
519         if (!p)
520             return NULL;
521         count -= p - ptr;
522         ptr = p;
523     }
524     ptr = parse_pres_arg(ptr, count, &ins->output);
525     if (ins->output.index_reg.table != PRES_REGTAB_COUNT)
526     {
527         FIXME("Relative addressing in output register not supported.\n");
528         return NULL;
529     }
530     if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset
531             + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1))
532             != get_reg_offset(ins->output.reg.table, ins->output.reg.offset))
533     {
534         FIXME("Instructions outputting multiple registers are not supported.\n");
535         return NULL;
536     }
537     return ptr;
538 }
539 
540 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc,
541         WORD *constantinfo_reserved)
542 {
543     const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc);
544 
545     if (!constant)
546     {
547         FIXME("Could not get constant desc.\n");
548         if (constantinfo_reserved)
549             *constantinfo_reserved = 0;
550         return D3DERR_INVALIDCALL;
551     }
552     *desc = constant->desc;
553     if (constantinfo_reserved)
554         *constantinfo_reserved = constant->constantinfo_reserved;
555     return D3D_OK;
556 }
557 
558 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set,
559         struct const_upload_info *info)
560 {
561     struct d3dx_parameter *param = const_set->param;
562     unsigned int table = const_set->table;
563 
564     info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
565             || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS);
566     if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS)
567     {
568         info->major = param->columns;
569         info->minor = param->rows;
570     }
571     else
572     {
573         info->major = param->rows;
574         info->minor = param->columns;
575     }
576 
577     if (get_reg_components(table) == 1)
578     {
579         unsigned int const_length = get_offset_reg(table, const_set->register_count);
580 
581         info->major_stride = info->minor;
582         info->major_count = const_length / info->major_stride;
583         info->minor_remainder = const_length % info->major_stride;
584     }
585     else
586     {
587         info->major_stride = get_reg_components(table);
588         info->major_count = const_set->register_count;
589         info->minor_remainder = 0;
590     }
591     info->count = info->major_count * info->minor + info->minor_remainder;
592 }
593 
594 #define INITIAL_CONST_SET_SIZE 16
595 
596 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set)
597 {
598     if (const_tab->const_set_count >= const_tab->const_set_size)
599     {
600         unsigned int new_size;
601         struct d3dx_const_param_eval_output *new_alloc;
602 
603         if (!const_tab->const_set_size)
604         {
605             new_size = INITIAL_CONST_SET_SIZE;
606             new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
607             if (!new_alloc)
608             {
609                 ERR("Out of memory.\n");
610                 return E_OUTOFMEMORY;
611             }
612         }
613         else
614         {
615             new_size = const_tab->const_set_size * 2;
616             new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
617                     sizeof(*const_tab->const_set) * new_size);
618             if (!new_alloc)
619             {
620                 ERR("Out of memory.\n");
621                 return E_OUTOFMEMORY;
622             }
623         }
624         const_tab->const_set = new_alloc;
625         const_tab->const_set_size = new_size;
626     }
627     const_tab->const_set[const_tab->const_set_count++] = *set;
628     return D3D_OK;
629 }
630 
631 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab,
632         struct d3dx_preshader *pres)
633 {
634     unsigned int i;
635     struct d3dx_const_param_eval_output const_set = {NULL};
636 
637     for (i = 0; i < pres->ins_count; ++i)
638     {
639         const struct d3dx_pres_ins *ins = &pres->ins[i];
640         const struct d3dx_pres_reg *reg = &ins->output.reg;
641 
642         if (reg->table == PRES_REGTAB_TEMP)
643             continue;
644 
645         const_set.register_index = get_reg_offset(reg->table, reg->offset);
646         const_set.register_count = 1;
647         const_set.table = reg->table;
648         const_set.constant_class = D3DXPC_FORCE_DWORD;
649         const_set.element_count = 1;
650         append_const_set(const_tab, &const_set);
651     }
652 }
653 
654 static int compare_const_set(const void *a, const void *b)
655 {
656     const struct d3dx_const_param_eval_output *r1 = a;
657     const struct d3dx_const_param_eval_output *r2 = b;
658 
659     if (r1->table != r2->table)
660         return r1->table - r2->table;
661     return r1->register_index - r2->register_index;
662 }
663 
664 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab,
665         struct d3dx_parameter *param, unsigned int index)
666 {
667     unsigned int i, start_index = index;
668     DWORD *current_data;
669     enum pres_reg_tables current_table;
670     unsigned int current_start_offset, element_count;
671     struct d3dx_const_param_eval_output *first_const;
672 
673     if (!const_tab->const_set_count)
674         return D3D_OK;
675 
676     while (index < const_tab->const_set_count - 1)
677     {
678         first_const = &const_tab->const_set[index];
679         current_data = first_const->param->data;
680         current_table = first_const->table;
681         current_start_offset = get_offset_reg(current_table, first_const->register_index);
682         element_count = 0;
683         for (i = index; i < const_tab->const_set_count; ++i)
684         {
685             struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i];
686             unsigned int count = get_offset_reg(const_set->table,
687                     const_set->register_count * const_set->element_count);
688             unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index);
689 
690             if (!(const_set->table == current_table && current_start_offset == start_offset
691                     && const_set->direct_copy == first_const->direct_copy
692                     && current_data == const_set->param->data
693                     && (const_set->direct_copy || (first_const->param->type == const_set->param->type
694                     && first_const->param->class == const_set->param->class
695                     && first_const->param->columns == const_set->param->columns
696                     && first_const->param->rows == const_set->param->rows
697                     && first_const->register_count == const_set->register_count
698                     && (i == const_tab->const_set_count - 1
699                     || first_const->param->element_count == const_set->param->element_count)))))
700                 break;
701 
702             current_start_offset += count;
703             current_data += const_set->direct_copy ? count : const_set->param->rows
704                     * const_set->param->columns * const_set->element_count;
705             element_count += const_set->element_count;
706         }
707 
708         if (i > index + 1)
709         {
710             TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index,
711                     debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy);
712 
713             first_const->element_count = element_count;
714             if (first_const->direct_copy)
715             {
716                 first_const->element_count = 1;
717                 if (index == start_index
718                         && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT))
719                 {
720                     if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
721                         return D3DERR_INVALIDCALL;
722                     first_const->param = param;
723                 }
724                 first_const->register_count = get_reg_offset(current_table, current_start_offset)
725                         - first_const->register_index;
726             }
727             memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i],
728                     sizeof(*const_tab->const_set) * (const_tab->const_set_count - i));
729             const_tab->const_set_count -= i - index - 1;
730         }
731         else
732         {
733             TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n",
734                     const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy);
735         }
736         index = i;
737     }
738     return D3D_OK;
739 }
740 
741 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
742         D3DXHANDLE hc, struct d3dx_parameter *param)
743 {
744     D3DXCONSTANT_DESC desc;
745     unsigned int const_count, param_count, i;
746     BOOL get_element;
747     struct d3dx_const_param_eval_output const_set;
748     struct const_upload_info info;
749     enum pres_value_type table_type;
750     HRESULT hr;
751 
752     if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL)))
753         return D3DERR_INVALIDCALL;
754 
755     if (param->element_count)
756     {
757         param_count = param->element_count;
758         const_count = desc.Elements;
759         get_element = TRUE;
760     }
761     else
762     {
763         if (desc.Elements > 1)
764         {
765             FIXME("Unexpected number of constant elements %u.\n", desc.Elements);
766             return D3DERR_INVALIDCALL;
767         }
768         param_count = param->member_count;
769         const_count = desc.StructMembers;
770         get_element = FALSE;
771     }
772     if (const_count != param_count)
773     {
774         FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n",
775                 param_count, const_count);
776         return D3DERR_INVALIDCALL;
777     }
778     if (const_count)
779     {
780         HRESULT ret = D3D_OK;
781         D3DXHANDLE hc_element;
782         unsigned int index = const_tab->const_set_count;
783 
784         for (i = 0; i < const_count; ++i)
785         {
786             if (get_element)
787                 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i);
788             else
789                 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i);
790             if (!hc_element)
791             {
792                 FIXME("Could not get constant.\n");
793                 hr = D3DERR_INVALIDCALL;
794             }
795             else
796             {
797                 hr = init_set_constants_param(const_tab, ctab, hc_element, &param->members[i]);
798             }
799             if (FAILED(hr))
800                 ret = hr;
801         }
802         if (FAILED(ret))
803             return ret;
804         return merge_const_set_entries(const_tab, param, index);
805     }
806 
807     TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n",
808             debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes);
809     TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n",
810             debugstr_a(param->name), param->rows, param->columns, param->class,
811             param->flags, param->bytes);
812 
813     const_set.element_count = 1;
814     const_set.param = param;
815     const_set.constant_class = desc.Class;
816     if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table))
817     {
818         FIXME("Unknown register set %u.\n", desc.RegisterSet);
819         return D3DERR_INVALIDCALL;
820     }
821     const_set.register_index = desc.RegisterIndex;
822     const_set.table = const_tab->regset2table[desc.RegisterSet];
823     if (const_set.table >= PRES_REGTAB_COUNT)
824     {
825         ERR("Unexpected register set %u.\n", desc.RegisterSet);
826         return D3DERR_INVALIDCALL;
827     }
828     assert(table_info[const_set.table].component_size == sizeof(unsigned int));
829     assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int));
830     const_set.register_count = desc.RegisterCount;
831     table_type = table_info[const_set.table].type;
832     get_const_upload_info(&const_set, &info);
833     if (!info.count)
834     {
835         TRACE("%s has zero count, skipping.\n", debugstr_a(param->name));
836         return D3D_OK;
837     }
838 
839     if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
840         return D3DERR_INVALIDCALL;
841 
842     const_set.direct_copy = table_type_from_param_type(param->type) == table_type
843             && !info.transpose && info.minor == info.major_stride
844             && info.count == get_offset_reg(const_set.table, const_set.register_count)
845             && info.count * sizeof(unsigned int) <= param->bytes;
846     if (info.minor_remainder && !const_set.direct_copy && !info.transpose)
847         FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n",
848                 debugstr_a(param->name));
849 
850     if (info.major_count > info.major
851             || (info.major_count == info.major && info.minor_remainder))
852     {
853         WARN("Constant dimensions exceed parameter size.\n");
854         return D3DERR_INVALIDCALL;
855     }
856 
857     if (FAILED(hr = append_const_set(const_tab, &const_set)))
858         return hr;
859 
860     return D3D_OK;
861 }
862 
863 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out,
864         struct d3dx9_base_effect *base, const char **skip_constants,
865         unsigned int skip_constants_count, struct d3dx_preshader *pres)
866 {
867     ID3DXConstantTable *ctab;
868     D3DXCONSTANT_DESC *cdesc;
869     struct d3dx_parameter **inputs_param;
870     D3DXCONSTANTTABLE_DESC desc;
871     HRESULT hr;
872     D3DXHANDLE hc;
873     unsigned int i, j;
874 
875     hr = D3DXGetShaderConstantTable(byte_code, &ctab);
876     if (FAILED(hr) || !ctab)
877     {
878         TRACE("Could not get CTAB data, hr %#x.\n", hr);
879         /* returning OK, shaders and preshaders without CTAB are valid */
880         return D3D_OK;
881     }
882     if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc)))
883     {
884         FIXME("Could not get CTAB desc, hr %#x.\n", hr);
885         goto cleanup;
886     }
887 
888     out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants);
889     out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants);
890     if (!cdesc || !inputs_param)
891     {
892         hr = E_OUTOFMEMORY;
893         goto cleanup;
894     }
895 
896     for (i = 0; i < desc.Constants; ++i)
897     {
898         unsigned int index = out->input_count;
899         WORD constantinfo_reserved;
900 
901         hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
902         if (!hc)
903         {
904             FIXME("Null constant handle.\n");
905             goto cleanup;
906         }
907         if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved)))
908             goto cleanup;
909         inputs_param[index] = get_parameter_by_name(base, NULL, cdesc[index].Name);
910         if (!inputs_param[index])
911         {
912             WARN("Could not find parameter %s in effect.\n", cdesc[index].Name);
913             continue;
914         }
915         if (cdesc[index].Class == D3DXPC_OBJECT)
916         {
917             TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]);
918             if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT
919                     || !is_param_type_sampler(inputs_param[index]->type))
920             {
921                 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name));
922                 hr = D3DERR_INVALIDCALL;
923                 goto cleanup;
924             }
925             if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount)
926             {
927                 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name));
928                 hr = D3DERR_INVALIDCALL;
929                 goto cleanup;
930             }
931         }
932         if (!is_top_level_parameter(inputs_param[index]))
933         {
934             WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name));
935             hr = E_FAIL;
936             goto cleanup;
937         }
938 
939         for (j = 0; j < skip_constants_count; ++j)
940         {
941             if (!strcmp(cdesc[index].Name, skip_constants[j]))
942             {
943                 if (!constantinfo_reserved)
944                 {
945                     WARN("skip_constants parameter %s is not register bound.\n",
946                             cdesc[index].Name);
947                     hr = D3DERR_INVALIDCALL;
948                     goto cleanup;
949                 }
950                 TRACE("Skipping constant %s.\n", cdesc[index].Name);
951                 break;
952             }
953         }
954         if (j < skip_constants_count)
955             continue;
956         ++out->input_count;
957         if (inputs_param[index]->class == D3DXPC_OBJECT)
958             continue;
959         if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index])))
960             goto cleanup;
961     }
962     if (pres)
963         append_pres_const_sets_for_shader_input(out, pres);
964     if (out->const_set_count)
965     {
966         struct d3dx_const_param_eval_output *new_alloc;
967 
968         qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set);
969 
970         i = 0;
971         while (i < out->const_set_count - 1)
972         {
973             if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD
974                     && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD
975                     && out->const_set[i].table == out->const_set[i + 1].table
976                     && out->const_set[i].register_index + out->const_set[i].register_count
977                     >= out->const_set[i + 1].register_index)
978             {
979                 assert(out->const_set[i].register_index + out->const_set[i].register_count
980                         <= out->const_set[i + 1].register_index + 1);
981                 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1
982                         - out->const_set[i].register_index;
983                 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i])
984                         * (out->const_set_count - i - 2));
985                 --out->const_set_count;
986             }
987             else
988             {
989                 ++i;
990             }
991         }
992 
993         new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set,
994                 sizeof(*out->const_set) * out->const_set_count);
995         if (new_alloc)
996         {
997             out->const_set = new_alloc;
998             out->const_set_size = out->const_set_count;
999         }
1000         else
1001         {
1002             WARN("Out of memory.\n");
1003         }
1004     }
1005 cleanup:
1006     ID3DXConstantTable_Release(ctab);
1007     return hr;
1008 }
1009 
1010 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register)
1011 {
1012     if (table < PRES_REGTAB_COUNT)
1013         table_sizes[table] = max(table_sizes[table], max_register + 1);
1014 }
1015 
1016 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab)
1017 {
1018     unsigned int i, table, max_register;
1019 
1020     for (i = 0; i < ctab->input_count; ++i)
1021     {
1022         if (!ctab->inputs[i].RegisterCount)
1023             continue;
1024         max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1;
1025         table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1026         update_table_size(table_sizes, table, max_register);
1027     }
1028 }
1029 
1030 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count)
1031 {
1032     static const char *xyzw_str = "xyzw";
1033     unsigned int i, table;
1034 
1035     table = arg->reg.table;
1036     if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT)
1037     {
1038         TRACE("(");
1039         for (i = 0; i < component_count; ++i)
1040             TRACE(i < component_count - 1 ? "%.16e, " : "%.16e",
1041                     ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]);
1042         TRACE(")");
1043     }
1044     else
1045     {
1046         if (arg->index_reg.table == PRES_REGTAB_COUNT)
1047         {
1048             TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset));
1049         }
1050         else
1051         {
1052             unsigned int index_reg;
1053 
1054             index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset);
1055             TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset),
1056                     table_symbol[arg->index_reg.table], index_reg,
1057                     xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]);
1058         }
1059         for (i = 0; i < component_count; ++i)
1060             TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]);
1061     }
1062 }
1063 
1064 static void dump_registers(struct d3dx_const_tab *ctab)
1065 {
1066     unsigned int table, i;
1067 
1068     for (i = 0; i < ctab->input_count; ++i)
1069     {
1070         table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1071         TRACE("//   %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)",
1072                 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount);
1073     }
1074 }
1075 
1076 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins)
1077 {
1078     unsigned int i;
1079 
1080     TRACE("%s ", pres_op_info[ins->op].mnem);
1081     dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count);
1082     for (i = 0; i < pres_op_info[ins->op].input_count; ++i)
1083     {
1084         TRACE(", ");
1085         dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count);
1086     }
1087     TRACE("\n");
1088 }
1089 
1090 static void dump_preshader(struct d3dx_preshader *pres)
1091 {
1092     unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4;
1093     const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED];
1094 
1095     if (immediate_count)
1096         TRACE("// Immediates:\n");
1097     for (i = 0; i < immediate_count; ++i)
1098     {
1099         if (!(i % 4))
1100             TRACE("// ");
1101         TRACE("%.8e", immediates[i]);
1102         if (i % 4 == 3)
1103             TRACE("\n");
1104         else
1105             TRACE(", ");
1106     }
1107     TRACE("// Preshader registers:\n");
1108     dump_registers(&pres->inputs);
1109     TRACE("preshader\n");
1110     for (i = 0; i < pres->ins_count; ++i)
1111         dump_ins(&pres->regs, &pres->ins[i]);
1112 }
1113 
1114 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base)
1115 {
1116     unsigned int *p;
1117     unsigned int i, j, const_count;
1118     double *dconst;
1119     HRESULT hr;
1120     unsigned int saved_word;
1121     unsigned int section_size;
1122 
1123     TRACE("Preshader version %#x.\n", *ptr & 0xffff);
1124 
1125     if (!count)
1126     {
1127         WARN("Unexpected end of byte code buffer.\n");
1128         return D3DXERR_INVALIDDATA;
1129     }
1130 
1131     p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, &section_size);
1132     if (p)
1133     {
1134         const_count = *p++;
1135         if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int)))
1136         {
1137             WARN("Byte code buffer ends unexpectedly.\n");
1138             return D3DXERR_INVALIDDATA;
1139         }
1140         dconst = (double *)p;
1141     }
1142     else
1143     {
1144         const_count = 0;
1145         dconst = NULL;
1146     }
1147     TRACE("%u double constants.\n", const_count);
1148 
1149     p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, &section_size);
1150     if (!p)
1151     {
1152         WARN("Could not find preshader code.\n");
1153         return D3D_OK;
1154     }
1155     pres->ins_count = *p++;
1156     --section_size;
1157     if (pres->ins_count > UINT_MAX / sizeof(*pres->ins))
1158     {
1159         WARN("Invalid instruction count %u.\n", pres->ins_count);
1160         return D3DXERR_INVALIDDATA;
1161     }
1162     TRACE("%u instructions.\n", pres->ins_count);
1163     pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count);
1164     if (!pres->ins)
1165         return E_OUTOFMEMORY;
1166     for (i = 0; i < pres->ins_count; ++i)
1167     {
1168         unsigned int *ptr_next;
1169 
1170         ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]);
1171         if (!ptr_next)
1172             return D3DXERR_INVALIDDATA;
1173         section_size -= ptr_next - p;
1174         p = ptr_next;
1175     }
1176 
1177     pres->inputs.regset2table = pres_regset2table;
1178 
1179     saved_word = *ptr;
1180     *ptr = 0xfffe0000;
1181     hr = get_constants_desc(ptr, &pres->inputs, base, NULL, 0, NULL);
1182     *ptr = saved_word;
1183     if (FAILED(hr))
1184         return hr;
1185 
1186     if (const_count % get_reg_components(PRES_REGTAB_IMMED))
1187     {
1188         FIXME("const_count %u is not a multiple of %u.\n", const_count,
1189                 get_reg_components(PRES_REGTAB_IMMED));
1190         return D3DXERR_INVALIDDATA;
1191     }
1192     pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count);
1193 
1194     update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs);
1195     for (i = 0; i < pres->ins_count; ++i)
1196     {
1197         for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j)
1198         {
1199             enum pres_reg_tables table;
1200             unsigned int reg_idx;
1201 
1202             if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT)
1203             {
1204                 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0
1205                         : pres->ins[i].component_count - 1;
1206 
1207                 table = pres->ins[i].inputs[j].reg.table;
1208                 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset
1209                         + last_component_index);
1210             }
1211             else
1212             {
1213                 table = pres->ins[i].inputs[j].index_reg.table;
1214                 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset);
1215             }
1216             if (reg_idx >= pres->regs.table_sizes[table])
1217             {
1218                 /* Native accepts these broken preshaders. */
1219                 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u, preshader parsing failed.\n",
1220                         i, j, table, reg_idx);
1221                 return D3DXERR_INVALIDDATA;
1222             }
1223         }
1224         update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table,
1225                 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset));
1226     }
1227     if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED)))
1228         return E_OUTOFMEMORY;
1229     regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count);
1230 
1231     return D3D_OK;
1232 }
1233 
1234 HRESULT d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size,
1235         D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter,
1236         const char **skip_constants, unsigned int skip_constants_count)
1237 {
1238     struct d3dx_param_eval *peval;
1239     unsigned int *ptr, *shader_ptr = NULL;
1240     unsigned int i;
1241     BOOL shader;
1242     unsigned int count, pres_size;
1243     HRESULT ret;
1244 
1245     TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n",
1246             base_effect, byte_code, byte_code_size, type, peval_out);
1247 
1248     count = byte_code_size / sizeof(unsigned int);
1249     if (!byte_code || !count)
1250     {
1251         *peval_out = NULL;
1252         return D3D_OK;
1253     }
1254 
1255     peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval));
1256     if (!peval)
1257     {
1258         ret = E_OUTOFMEMORY;
1259         goto err_out;
1260     }
1261     peval->version_counter = version_counter;
1262 
1263     peval->param_type = type;
1264     switch (type)
1265     {
1266         case D3DXPT_VERTEXSHADER:
1267         case D3DXPT_PIXELSHADER:
1268             shader = TRUE;
1269             break;
1270         default:
1271             shader = FALSE;
1272             break;
1273     }
1274     peval->shader_inputs.regset2table = shad_regset2table;
1275 
1276     ptr = (unsigned int *)byte_code;
1277     if (shader)
1278     {
1279         if ((*ptr & 0xfffe0000) != 0xfffe0000)
1280         {
1281             FIXME("Invalid shader signature %#x.\n", *ptr);
1282             ret = D3DXERR_INVALIDDATA;
1283             goto err_out;
1284         }
1285         TRACE("Shader version %#x.\n", *ptr & 0xffff);
1286         shader_ptr = ptr;
1287         ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size);
1288         if (!ptr)
1289             TRACE("No preshader found.\n");
1290     }
1291     else
1292     {
1293         pres_size = count;
1294     }
1295 
1296     if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, base_effect)))
1297     {
1298         FIXME("Failed parsing preshader, byte code for analysis follows.\n");
1299         dump_bytecode(byte_code, byte_code_size);
1300         goto err_out;
1301     }
1302 
1303     if (shader)
1304     {
1305         if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, base_effect,
1306                 skip_constants, skip_constants_count, &peval->pres)))
1307         {
1308             TRACE("Could not get shader constant table, hr %#x.\n", ret);
1309             goto err_out;
1310         }
1311         update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs);
1312     }
1313 
1314     for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i)
1315     {
1316         if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i)))
1317             goto err_out;
1318     }
1319 
1320     if (TRACE_ON(d3dx))
1321     {
1322         dump_bytecode(byte_code, byte_code_size);
1323         dump_preshader(&peval->pres);
1324         if (shader)
1325         {
1326             TRACE("// Shader registers:\n");
1327             dump_registers(&peval->shader_inputs);
1328         }
1329     }
1330     *peval_out = peval;
1331     TRACE("Created parameter evaluator %p.\n", *peval_out);
1332     return D3D_OK;
1333 
1334 err_out:
1335     WARN("Error creating parameter evaluator.\n");
1336     if (TRACE_ON(d3dx))
1337         dump_bytecode(byte_code, byte_code_size);
1338 
1339     d3dx_free_param_eval(peval);
1340     *peval_out = NULL;
1341     return ret;
1342 }
1343 
1344 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
1345 {
1346     HeapFree(GetProcessHeap(), 0, ctab->inputs);
1347     HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
1348     HeapFree(GetProcessHeap(), 0, ctab->const_set);
1349 }
1350 
1351 static void d3dx_free_preshader(struct d3dx_preshader *pres)
1352 {
1353     HeapFree(GetProcessHeap(), 0, pres->ins);
1354 
1355     regstore_free_tables(&pres->regs);
1356     d3dx_free_const_tab(&pres->inputs);
1357 }
1358 
1359 void d3dx_free_param_eval(struct d3dx_param_eval *peval)
1360 {
1361     TRACE("peval %p.\n", peval);
1362 
1363     if (!peval)
1364         return;
1365 
1366     d3dx_free_preshader(&peval->pres);
1367     d3dx_free_const_tab(&peval->shader_inputs);
1368     HeapFree(GetProcessHeap(), 0, peval);
1369 }
1370 
1371 static void pres_int_from_float(void *out, const void *in, unsigned int count)
1372 {
1373     unsigned int i;
1374     const float *in_float = in;
1375     int *out_int = out;
1376 
1377     for (i = 0; i < count; ++i)
1378         out_int[i] = in_float[i];
1379 }
1380 
1381 static void pres_bool_from_value(void *out, const void *in, unsigned int count)
1382 {
1383     unsigned int i;
1384     const DWORD *in_dword = in;
1385     BOOL *out_bool = out;
1386 
1387     for (i = 0; i < count; ++i)
1388         out_bool[i] = !!in_dword[i];
1389 }
1390 
1391 static void pres_float_from_int(void *out, const void *in, unsigned int count)
1392 {
1393     unsigned int i;
1394     const int *in_int = in;
1395     float *out_float = out;
1396 
1397     for (i = 0; i < count; ++i)
1398         out_float[i] = in_int[i];
1399 }
1400 
1401 static void pres_float_from_bool(void *out, const void *in, unsigned int count)
1402 {
1403     unsigned int i;
1404     const BOOL *in_bool = in;
1405     float *out_float = out;
1406 
1407     for (i = 0; i < count; ++i)
1408         out_float[i] = !!in_bool[i];
1409 }
1410 
1411 static void pres_int_from_bool(void *out, const void *in, unsigned int count)
1412 {
1413     unsigned int i;
1414     const float *in_bool = in;
1415     int *out_int = out;
1416 
1417     for (i = 0; i < count; ++i)
1418         out_int[i] = !!in_bool[i];
1419 }
1420 
1421 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table,
1422         unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type)
1423 {
1424     typedef void (*conv_func)(void *out, const void *in, unsigned int count);
1425     static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] =
1426     {
1427         {NULL,                 NULL, pres_int_from_float, pres_bool_from_value},
1428         {NULL,                 NULL, NULL,                NULL},
1429         {pres_float_from_int,  NULL, NULL,                pres_bool_from_value},
1430         {pres_float_from_bool, NULL, pres_int_from_bool,  NULL}
1431     };
1432     enum pres_value_type table_type = table_info[table].type;
1433 
1434     if (param_type == table_type)
1435     {
1436         regstore_set_values(rs, table, in, offset, count);
1437         return;
1438     }
1439 
1440     set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count);
1441 }
1442 
1443 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1444         D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr,
1445         unsigned int start, unsigned int count)
1446 {
1447     if (type == D3DXPT_VERTEXSHADER)
1448     {
1449         switch(table)
1450         {
1451             case PRES_REGTAB_OCONST:
1452                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count);
1453             case PRES_REGTAB_OICONST:
1454                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count);
1455             case PRES_REGTAB_OBCONST:
1456                 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count);
1457             default:
1458                 FIXME("Unexpected register table %u.\n", table);
1459                 return D3DERR_INVALIDCALL;
1460         }
1461     }
1462     else if (type == D3DXPT_PIXELSHADER)
1463     {
1464         switch(table)
1465         {
1466             case PRES_REGTAB_OCONST:
1467                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count);
1468             case PRES_REGTAB_OICONST:
1469                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count);
1470             case PRES_REGTAB_OBCONST:
1471                 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count);
1472             default:
1473                 FIXME("Unexpected register table %u.\n", table);
1474                 return D3DERR_INVALIDCALL;
1475         }
1476     }
1477     else
1478     {
1479         FIXME("Unexpected parameter type %u.\n", type);
1480         return D3DERR_INVALIDCALL;
1481     }
1482 }
1483 
1484 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
1485         ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1486         D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty)
1487 {
1488     unsigned int const_idx;
1489     unsigned int current_start = 0, current_count = 0;
1490     enum pres_reg_tables current_table = PRES_REGTAB_COUNT;
1491     BOOL update_device = manager || device;
1492     HRESULT hr, result = D3D_OK;
1493     ULONG64 update_version = const_tab->update_version;
1494 
1495     for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1496     {
1497         struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1498         enum pres_reg_tables table = const_set->table;
1499         struct d3dx_parameter *param = const_set->param;
1500         unsigned int element, i, j, start_offset;
1501         struct const_upload_info info;
1502         unsigned int *data;
1503         enum pres_value_type param_type;
1504 
1505         if (!(param && is_param_dirty(param, update_version)))
1506             continue;
1507 
1508         data = param->data;
1509         start_offset = get_offset_reg(table, const_set->register_index);
1510         if (const_set->direct_copy)
1511         {
1512             regstore_set_values(rs, table, data, start_offset,
1513                     get_offset_reg(table, const_set->register_count));
1514             continue;
1515         }
1516         param_type = table_type_from_param_type(param->type);
1517         if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR)
1518         {
1519             unsigned int count = max(param->rows, param->columns);
1520 
1521             if (count >= get_reg_components(table))
1522             {
1523                 regstore_set_data(rs, table, start_offset, data,
1524                         count * const_set->element_count, param_type);
1525             }
1526             else
1527             {
1528                 for (element = 0; element < const_set->element_count; ++element)
1529                     regstore_set_data(rs, table, start_offset + get_offset_reg(table, element),
1530                             &data[element * count], count, param_type);
1531             }
1532             continue;
1533         }
1534         get_const_upload_info(const_set, &info);
1535         for (element = 0; element < const_set->element_count; ++element)
1536         {
1537             unsigned int *out = (unsigned int *)rs->tables[table] + start_offset;
1538 
1539             /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer.
1540              * All the supported types of parameters and table values have the same size. */
1541             if (info.transpose)
1542             {
1543                 for (i = 0; i < info.major_count; ++i)
1544                     for (j = 0; j < info.minor; ++j)
1545                         out[i * info.major_stride + j] = data[i + j * info.major];
1546 
1547                 for (j = 0; j < info.minor_remainder; ++j)
1548                     out[i * info.major_stride + j] = data[i + j * info.major];
1549             }
1550             else
1551             {
1552                 for (i = 0; i < info.major_count; ++i)
1553                     for (j = 0; j < info.minor; ++j)
1554                         out[i * info.major_stride + j] = data[i * info.minor + j];
1555             }
1556             start_offset += get_offset_reg(table, const_set->register_count);
1557             data += param->rows * param->columns;
1558         }
1559         start_offset = get_offset_reg(table, const_set->register_index);
1560         if (table_info[table].type != param_type)
1561             regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset,
1562                     get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type);
1563     }
1564     const_tab->update_version = new_update_version;
1565     if (!update_device)
1566         return D3D_OK;
1567 
1568     for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1569     {
1570         struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1571 
1572         if (device_update_all || (const_set->param
1573                 ? is_param_dirty(const_set->param, update_version) : pres_dirty))
1574         {
1575             enum pres_reg_tables table = const_set->table;
1576 
1577             if (table == current_table && current_start + current_count == const_set->register_index)
1578             {
1579                 current_count += const_set->register_count * const_set->element_count;
1580             }
1581             else
1582             {
1583                 if (current_count)
1584                 {
1585                     if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1586                             (DWORD *)rs->tables[current_table]
1587                             + get_offset_reg(current_table, current_start), current_start, current_count)))
1588                         result = hr;
1589                 }
1590                 current_table = table;
1591                 current_start = const_set->register_index;
1592                 current_count = const_set->register_count * const_set->element_count;
1593             }
1594         }
1595     }
1596     if (current_count)
1597     {
1598         if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1599                 (DWORD *)rs->tables[current_table]
1600                 + get_offset_reg(current_table, current_start), current_start, current_count)))
1601             result = hr;
1602     }
1603     return result;
1604 }
1605 
1606 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset)
1607 {
1608     return regstore_get_double(rs, table, offset);
1609 }
1610 
1611 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp)
1612 {
1613     unsigned int offset, base_index, reg_index, table;
1614 
1615     table = opr->reg.table;
1616 
1617     if (opr->index_reg.table == PRES_REGTAB_COUNT)
1618         base_index = 0;
1619     else
1620         base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset));
1621 
1622     offset = get_offset_reg(table, base_index) + opr->reg.offset + comp;
1623     reg_index = get_reg_offset(table, offset);
1624 
1625     if (reg_index >= rs->table_sizes[table])
1626     {
1627         unsigned int wrap_size;
1628 
1629         if (table == PRES_REGTAB_CONST)
1630         {
1631             /* As it can be guessed from tests, offset into floating constant table is wrapped
1632              * to the nearest power of 2 and not to the actual table size. */
1633             for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1)
1634                 ;
1635         }
1636         else
1637         {
1638             wrap_size = rs->table_sizes[table];
1639         }
1640         WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n",
1641                 reg_index, table, wrap_size, rs->table_sizes[table]);
1642         reg_index %= wrap_size;
1643 
1644         if (reg_index >= rs->table_sizes[table])
1645             return 0.0;
1646 
1647         offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table);
1648     }
1649 
1650     return exec_get_reg_value(rs, table, offset);
1651 }
1652 
1653 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg,
1654         unsigned int comp, double res)
1655 {
1656     regstore_set_double(rs, reg->table, reg->offset + comp, res);
1657 }
1658 
1659 #define ARGS_ARRAY_SIZE 8
1660 static HRESULT execute_preshader(struct d3dx_preshader *pres)
1661 {
1662     unsigned int i, j, k;
1663     double args[ARGS_ARRAY_SIZE];
1664     double res;
1665 
1666     for (i = 0; i < pres->ins_count; ++i)
1667     {
1668         const struct d3dx_pres_ins *ins;
1669         const struct op_info *oi;
1670 
1671         ins = &pres->ins[i];
1672         oi = &pres_op_info[ins->op];
1673         if (oi->func_all_comps)
1674         {
1675             if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE)
1676             {
1677                 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count);
1678                 return E_FAIL;
1679             }
1680             for (k = 0; k < oi->input_count; ++k)
1681                 for (j = 0; j < ins->component_count; ++j)
1682                     args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k],
1683                             ins->scalar_op && !k ? 0 : j);
1684             res = oi->func(args, ins->component_count);
1685 
1686             /* only 'dot' instruction currently falls here */
1687             exec_set_arg(&pres->regs, &ins->output.reg, 0, res);
1688         }
1689         else
1690         {
1691             for (j = 0; j < ins->component_count; ++j)
1692             {
1693                 for (k = 0; k < oi->input_count; ++k)
1694                     args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j);
1695                 res = oi->func(args, ins->component_count);
1696                 exec_set_arg(&pres->regs, &ins->output.reg, j, res);
1697             }
1698         }
1699     }
1700     return D3D_OK;
1701 }
1702 
1703 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version)
1704 {
1705     unsigned int i;
1706 
1707     if (update_version == ULONG64_MAX)
1708         update_version = ctab->update_version;
1709     for (i = 0; i < ctab->input_count; ++i)
1710     {
1711         if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]),
1712                 update_version))
1713             return TRUE;
1714     }
1715     return FALSE;
1716 }
1717 
1718 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version)
1719 {
1720     return is_const_tab_input_dirty(&peval->pres.inputs, update_version)
1721             || is_const_tab_input_dirty(&peval->shader_inputs, update_version);
1722 }
1723 
1724 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param,
1725         void *param_value)
1726 {
1727     HRESULT hr;
1728     unsigned int i;
1729     unsigned int elements, elements_param, elements_table;
1730     float *oc;
1731 
1732     TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
1733 
1734     if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX))
1735     {
1736         set_constants(&peval->pres.regs, &peval->pres.inputs,
1737                 next_update_version(peval->version_counter),
1738                 NULL, NULL, peval->param_type, FALSE, FALSE);
1739 
1740         if (FAILED(hr = execute_preshader(&peval->pres)))
1741             return hr;
1742     }
1743 
1744     elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]);
1745     elements_param = param->bytes / sizeof(unsigned int);
1746     elements = min(elements_table, elements_param);
1747     oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST];
1748     for (i = 0; i < elements; ++i)
1749         set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT);
1750     return D3D_OK;
1751 }
1752 
1753 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1754         struct d3dx_param_eval *peval, BOOL update_all)
1755 {
1756     HRESULT hr;
1757     struct d3dx_preshader *pres = &peval->pres;
1758     struct d3dx_regstore *rs = &pres->regs;
1759     ULONG64 new_update_version = next_update_version(peval->version_counter);
1760     BOOL pres_dirty = FALSE;
1761 
1762     TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
1763 
1764     if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX))
1765     {
1766         set_constants(rs, &pres->inputs, new_update_version,
1767                 NULL, NULL, peval->param_type, FALSE, FALSE);
1768         if (FAILED(hr = execute_preshader(pres)))
1769             return hr;
1770         pres_dirty = TRUE;
1771     }
1772 
1773     return set_constants(rs, &peval->shader_inputs, new_update_version,
1774             manager, device, peval->param_type, update_all, pres_dirty);
1775 }
1776