xref: /reactos/dll/directx/wine/d3dx9_36/preshader.c (revision d09998df)
1  /*
2   * Copyright 2016 Paul Gofman
3   *
4   * This library is free software; you can redistribute it and/or
5   * modify it under the terms of the GNU Lesser General Public
6   * License as published by the Free Software Foundation; either
7   * version 2.1 of the License, or (at your option) any later version.
8   *
9   * This library is distributed in the hope that it will be useful,
10   * but WITHOUT ANY WARRANTY; without even the implied warranty of
11   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   * Lesser General Public License for more details.
13   *
14   * You should have received a copy of the GNU Lesser General Public
15   * License along with this library; if not, write to the Free Software
16   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17   */
18  
19  #include "d3dx9_36_private.h"
20  
21  #include <assert.h>
22  
23  /* ReactOS FIXME: Insect */
24  #define fmin min
25  #define fmax max
26  
27  enum pres_ops
28  {
29      PRESHADER_OP_NOP,
30      PRESHADER_OP_MOV,
31      PRESHADER_OP_NEG,
32      PRESHADER_OP_RCP,
33      PRESHADER_OP_FRC,
34      PRESHADER_OP_EXP,
35      PRESHADER_OP_LOG,
36      PRESHADER_OP_RSQ,
37      PRESHADER_OP_SIN,
38      PRESHADER_OP_COS,
39      PRESHADER_OP_ASIN,
40      PRESHADER_OP_ACOS,
41      PRESHADER_OP_ATAN,
42      PRESHADER_OP_MIN,
43      PRESHADER_OP_MAX,
44      PRESHADER_OP_LT,
45      PRESHADER_OP_GE,
46      PRESHADER_OP_ADD,
47      PRESHADER_OP_MUL,
48      PRESHADER_OP_ATAN2,
49      PRESHADER_OP_DIV,
50      PRESHADER_OP_CMP,
51      PRESHADER_OP_DOT,
52      PRESHADER_OP_DOTSWIZ6,
53      PRESHADER_OP_DOTSWIZ8,
54  };
55  
56  typedef double (*pres_op_func)(double *args, int n);
57  
58  static double to_signed_nan(double v)
59  {
60      static const union
61      {
62          ULONG64 ulong64_value;
63          double double_value;
64      }
65      signed_nan =
66      {
67          0xfff8000000000000
68      };
69  
70      return isnan(v) ? signed_nan.double_value : v;
71  }
72  
73  static double pres_mov(double *args, int n) {return args[0];}
74  static double pres_add(double *args, int n) {return args[0] + args[1];}
75  static double pres_mul(double *args, int n) {return args[0] * args[1];}
76  static double pres_dot(double *args, int n)
77  {
78      int i;
79      double sum;
80  
81      sum = 0.0;
82      for (i = 0; i < n; ++i)
83          sum += args[i] * args[i + n];
84      return sum;
85  }
86  
87  static double pres_dotswiz6(double *args, int n)
88  {
89      return pres_dot(args, 3);
90  }
91  
92  static double pres_dotswiz8(double *args, int n)
93  {
94      return pres_dot(args, 4);
95  }
96  
97  static double pres_neg(double *args, int n) {return -args[0];}
98  static double pres_rcp(double *args, int n) {return 1.0 / args[0];}
99  static double pres_lt(double *args, int n)  {return args[0] < args[1] ? 1.0 : 0.0;}
100  static double pres_ge(double *args, int n)  {return args[0] >= args[1] ? 1.0 : 0.0;}
101  static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);}
102  static double pres_min(double *args, int n) {return fmin(args[0], args[1]);}
103  static double pres_max(double *args, int n) {return fmax(args[0], args[1]);}
104  static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];}
105  static double pres_sin(double *args, int n) {return sin(args[0]);}
106  static double pres_cos(double *args, int n) {return cos(args[0]);}
107  static double pres_rsq(double *args, int n)
108  {
109      double v;
110  
111      v = fabs(args[0]);
112      if (v == 0.0)
113          return INFINITY;
114      else
115          return 1.0 / sqrt(v);
116  }
117  static double pres_exp(double *args, int n) {return pow(2.0, args[0]);}
118  static double pres_log(double *args, int n)
119  {
120      double v;
121  
122      v = fabs(args[0]);
123      if (v == 0.0)
124          return 0.0;
125      else
126  #ifdef HAVE_LOG2
127          return log2(v);
128  #else
129          return log(v) / log(2);
130  #endif
131  }
132  static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));}
133  static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));}
134  static double pres_atan(double *args, int n) {return atan(args[0]);}
135  static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);}
136  
137  /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever
138   * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */
139  static double pres_div(double *args, int n) {return 0.0;}
140  
141  #define PRES_OPCODE_MASK 0x7ff00000
142  #define PRES_OPCODE_SHIFT 20
143  #define PRES_SCALAR_FLAG 0x80000000
144  #define PRES_NCOMP_MASK  0x0000ffff
145  
146  #define FOURCC_PRES 0x53455250
147  #define FOURCC_CLIT 0x54494c43
148  #define FOURCC_FXLC 0x434c5846
149  #define FOURCC_PRSI 0x49535250
150  #define PRES_SIGN 0x46580000
151  
152  struct op_info
153  {
154      unsigned int opcode;
155      char mnem[16];
156      unsigned int input_count;
157      BOOL func_all_comps;
158      pres_op_func func;
159  };
160  
161  static const struct op_info pres_op_info[] =
162  {
163      {0x000, "nop", 0, 0, NULL    }, /* PRESHADER_OP_NOP */
164      {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */
165      {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */
166      {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */
167      {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */
168      {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */
169      {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */
170      {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */
171      {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */
172      {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */
173      {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */
174      {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */
175      {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */
176      {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */
177      {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */
178      {0x202, "lt",  2, 0, pres_lt }, /* PRESHADER_OP_LT  */
179      {0x203, "ge",  2, 0, pres_ge }, /* PRESHADER_OP_GE  */
180      {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */
181      {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */
182      {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */
183      {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */
184      {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */
185      {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */
186      {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */
187      {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */
188  };
189  
190  enum pres_value_type
191  {
192      PRES_VT_FLOAT,
193      PRES_VT_DOUBLE,
194      PRES_VT_INT,
195      PRES_VT_BOOL,
196      PRES_VT_COUNT
197  };
198  
199  static const struct
200  {
201      unsigned int component_size;
202      enum pres_value_type type;
203  }
204  table_info[] =
205  {
206      {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */
207      {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */
208      {sizeof(float),  PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */
209      {sizeof(BOOL),   PRES_VT_BOOL  }, /* PRES_REGTAB_OBCONST */
210      {sizeof(int),    PRES_VT_INT,  }, /* PRES_REGTAB_OICONST */
211      /* TODO: use double precision for 64 bit */
212      {sizeof(float),  PRES_VT_FLOAT }  /* PRES_REGTAB_TEMP */
213  };
214  
215  static const char *table_symbol[] =
216  {
217      "imm", "c", "oc", "ob", "oi", "r", "(null)",
218  };
219  
220  static const enum pres_reg_tables pres_regset2table[] =
221  {
222      PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
223      PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
224      PRES_REGTAB_CONST,    /* D3DXRS_FLOAT4 */
225      PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
226  };
227  
228  static const enum pres_reg_tables shad_regset2table[] =
229  {
230      PRES_REGTAB_OBCONST,  /* D3DXRS_BOOL */
231      PRES_REGTAB_OICONST,  /* D3DXRS_INT4 */
232      PRES_REGTAB_OCONST,   /* D3DXRS_FLOAT4 */
233      PRES_REGTAB_COUNT,     /* D3DXRS_SAMPLER */
234  };
235  
236  struct d3dx_pres_reg
237  {
238      enum pres_reg_tables table;
239      /* offset is component index, not register index, e. g.
240         offset for component c3.y is 13 (3 * 4 + 1) */
241      unsigned int offset;
242  };
243  
244  struct d3dx_pres_operand
245  {
246      struct d3dx_pres_reg reg;
247      struct d3dx_pres_reg index_reg;
248  };
249  
250  #define MAX_INPUTS_COUNT 8
251  
252  struct d3dx_pres_ins
253  {
254      enum pres_ops op;
255      /* first input argument is scalar,
256         scalar component is propagated */
257      BOOL scalar_op;
258      unsigned int component_count;
259      struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT];
260      struct d3dx_pres_operand output;
261  };
262  
263  struct const_upload_info
264  {
265      BOOL transpose;
266      unsigned int major, minor;
267      unsigned int major_stride;
268      unsigned int major_count;
269      unsigned int count;
270      unsigned int minor_remainder;
271  };
272  
273  static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type)
274  {
275      switch (type)
276      {
277          case D3DXPT_FLOAT:
278              return PRES_VT_FLOAT;
279          case D3DXPT_INT:
280              return PRES_VT_INT;
281          case D3DXPT_BOOL:
282              return PRES_VT_BOOL;
283          default:
284              FIXME("Unsupported type %u.\n", type);
285              return PRES_VT_COUNT;
286      }
287  }
288  
289  static unsigned int get_reg_offset(unsigned int table, unsigned int offset)
290  {
291      return table == PRES_REGTAB_OBCONST ? offset : offset >> 2;
292  }
293  
294  static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx)
295  {
296      return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2;
297  }
298  
299  static unsigned int get_reg_components(unsigned int table)
300  {
301      return get_offset_reg(table, 1);
302  }
303  
304  #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8)
305  
306  static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table)
307  {
308      unsigned int size;
309  
310      size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size;
311      if (size)
312      {
313          rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
314          if (!rs->tables[table])
315              return E_OUTOFMEMORY;
316      }
317      return D3D_OK;
318  }
319  
320  static void regstore_free_tables(struct d3dx_regstore *rs)
321  {
322      unsigned int i;
323  
324      for (i = 0; i < PRES_REGTAB_COUNT; ++i)
325      {
326          HeapFree(GetProcessHeap(), 0, rs->tables[i]);
327      }
328  }
329  
330  static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data,
331          unsigned int start_offset, unsigned int count)
332  {
333      BYTE *dst = rs->tables[table];
334      const BYTE *src = data;
335      unsigned int size;
336  
337      dst += start_offset * table_info[table].component_size;
338      size = count * table_info[table].component_size;
339      assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst));
340      memcpy(dst, src, size);
341  }
342  
343  static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset)
344  {
345      BYTE *p;
346  
347      p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
348      switch (table_info[table].type)
349      {
350          case PRES_VT_FLOAT:
351              return *(float *)p;
352          case PRES_VT_DOUBLE:
353              return *(double *)p;
354          default:
355              FIXME("Unexpected preshader input from table %u.\n", table);
356              return NAN;
357      }
358  }
359  
360  static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v)
361  {
362      BYTE *p;
363  
364      p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset;
365      switch (table_info[table].type)
366      {
367          case PRES_VT_FLOAT : *(float *)p = v; break;
368          case PRES_VT_DOUBLE: *(double *)p = v; break;
369          case PRES_VT_INT   : *(int *)p = lrint(v); break;
370          case PRES_VT_BOOL  : *(BOOL *)p = !!v; break;
371          default:
372              FIXME("Bad type %u.\n", table_info[table].type);
373              break;
374      }
375  }
376  
377  static void dump_bytecode(void *data, unsigned int size)
378  {
379      unsigned int *bytecode = (unsigned int *)data;
380      unsigned int i, j, n;
381  
382      size /= sizeof(*bytecode);
383      i = 0;
384      while (i < size)
385      {
386          n = min(size - i, 8);
387          for (j = 0; j < n; ++j)
388              TRACE("0x%08x,", bytecode[i + j]);
389          i += n;
390          TRACE("\n");
391      }
392  }
393  
394  static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count,
395          unsigned int fourcc, unsigned int *size)
396  {
397      /* Provide at least one value in comment section on non-NULL return. */
398      while (count > 2 && (*ptr & 0xffff) == 0xfffe)
399      {
400          unsigned int section_size;
401  
402          section_size = (*ptr >> 16);
403          if (!section_size || section_size + 1 > count)
404              break;
405          if (*(ptr + 1) == fourcc)
406          {
407              *size = section_size;
408              return ptr + 2;
409          }
410          count -= section_size + 1;
411          ptr += section_size + 1;
412      }
413      return NULL;
414  }
415  
416  static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg)
417  {
418      static const enum pres_reg_tables reg_table[8] =
419      {
420          PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT,
421          PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP
422      };
423  
424      if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT)
425      {
426          FIXME("Unsupported register table %#x.\n", *ptr);
427          return NULL;
428      }
429  
430      reg->table = reg_table[*ptr++];
431      reg->offset = *ptr++;
432      return ptr;
433  }
434  
435  static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr)
436  {
437      if (count < 3 || (*ptr && count < 5))
438      {
439          WARN("Byte code buffer ends unexpectedly, count %u.\n", count);
440          return NULL;
441      }
442  
443      if (*ptr)
444      {
445          if (*ptr != 1)
446          {
447              FIXME("Unknown relative addressing flag, word %#x.\n", *ptr);
448              return NULL;
449          }
450          ptr = parse_pres_reg(ptr + 1, &opr->index_reg);
451          if (!ptr)
452              return NULL;
453      }
454      else
455      {
456          opr->index_reg.table = PRES_REGTAB_COUNT;
457          ++ptr;
458      }
459  
460      ptr = parse_pres_reg(ptr, &opr->reg);
461  
462      if (opr->reg.table == PRES_REGTAB_OBCONST)
463          opr->reg.offset /= 4;
464      return ptr;
465  }
466  
467  static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins)
468  {
469      unsigned int ins_code, ins_raw;
470      unsigned int input_count;
471      unsigned int i;
472  
473      if (count < 2)
474      {
475          WARN("Byte code buffer ends unexpectedly.\n");
476          return NULL;
477      }
478  
479      ins_raw = *ptr++;
480      ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT;
481      ins->component_count = ins_raw & PRES_NCOMP_MASK;
482      ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG);
483  
484      if (ins->component_count < 1 || ins->component_count > 4)
485      {
486          FIXME("Unsupported number of components %u.\n", ins->component_count);
487          return NULL;
488      }
489      input_count = *ptr++;
490      count -= 2;
491      for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i)
492          if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count)
493              break;
494      if (i == ARRAY_SIZE(pres_op_info))
495      {
496          FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw);
497          return NULL;
498      }
499      ins->op = i;
500      if (input_count > ARRAY_SIZE(ins->inputs))
501      {
502          FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count,
503                  pres_op_info[i].mnem);
504          return NULL;
505      }
506      for (i = 0; i < input_count; ++i)
507      {
508          unsigned int *p;
509  
510          p = parse_pres_arg(ptr, count, &ins->inputs[i]);
511          if (!p)
512              return NULL;
513          count -= p - ptr;
514          ptr = p;
515      }
516      ptr = parse_pres_arg(ptr, count, &ins->output);
517      if (ins->output.index_reg.table != PRES_REGTAB_COUNT)
518      {
519          FIXME("Relative addressing in output register not supported.\n");
520          return NULL;
521      }
522      if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset
523              + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1))
524              != get_reg_offset(ins->output.reg.table, ins->output.reg.offset))
525      {
526          FIXME("Instructions outputting multiple registers are not supported.\n");
527          return NULL;
528      }
529      return ptr;
530  }
531  
532  static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc,
533          WORD *constantinfo_reserved)
534  {
535      const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc);
536  
537      if (!constant)
538      {
539          FIXME("Could not get constant desc.\n");
540          return D3DERR_INVALIDCALL;
541      }
542      *desc = constant->desc;
543      if (constantinfo_reserved)
544          *constantinfo_reserved = constant->constantinfo_reserved;
545      return D3D_OK;
546  }
547  
548  static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set,
549          struct const_upload_info *info)
550  {
551      struct d3dx_parameter *param = const_set->param;
552      unsigned int table = const_set->table;
553  
554      info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS)
555              || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS);
556      if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS)
557      {
558          info->major = param->columns;
559          info->minor = param->rows;
560      }
561      else
562      {
563          info->major = param->rows;
564          info->minor = param->columns;
565      }
566  
567      if (get_reg_components(table) == 1)
568      {
569          unsigned int const_length = get_offset_reg(table, const_set->register_count);
570  
571          info->major_stride = info->minor;
572          info->major_count = const_length / info->major_stride;
573          info->minor_remainder = const_length % info->major_stride;
574      }
575      else
576      {
577          info->major_stride = get_reg_components(table);
578          info->major_count = const_set->register_count;
579          info->minor_remainder = 0;
580      }
581      info->count = info->major_count * info->minor + info->minor_remainder;
582  }
583  
584  #define INITIAL_CONST_SET_SIZE 16
585  
586  static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set)
587  {
588      if (const_tab->const_set_count >= const_tab->const_set_size)
589      {
590          unsigned int new_size;
591          struct d3dx_const_param_eval_output *new_alloc;
592  
593          if (!const_tab->const_set_size)
594          {
595              new_size = INITIAL_CONST_SET_SIZE;
596              new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size);
597              if (!new_alloc)
598              {
599                  ERR("Out of memory.\n");
600                  return E_OUTOFMEMORY;
601              }
602          }
603          else
604          {
605              new_size = const_tab->const_set_size * 2;
606              new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set,
607                      sizeof(*const_tab->const_set) * new_size);
608              if (!new_alloc)
609              {
610                  ERR("Out of memory.\n");
611                  return E_OUTOFMEMORY;
612              }
613          }
614          const_tab->const_set = new_alloc;
615          const_tab->const_set_size = new_size;
616      }
617      const_tab->const_set[const_tab->const_set_count++] = *set;
618      return D3D_OK;
619  }
620  
621  static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab,
622          struct d3dx_preshader *pres)
623  {
624      unsigned int i;
625      struct d3dx_const_param_eval_output const_set = {NULL};
626  
627      for (i = 0; i < pres->ins_count; ++i)
628      {
629          const struct d3dx_pres_ins *ins = &pres->ins[i];
630          const struct d3dx_pres_reg *reg = &ins->output.reg;
631  
632          if (reg->table == PRES_REGTAB_TEMP)
633              continue;
634  
635          const_set.register_index = get_reg_offset(reg->table, reg->offset);
636          const_set.register_count = 1;
637          const_set.table = reg->table;
638          const_set.constant_class = D3DXPC_FORCE_DWORD;
639          const_set.element_count = 1;
640          append_const_set(const_tab, &const_set);
641      }
642  }
643  
644  static int compare_const_set(const void *a, const void *b)
645  {
646      const struct d3dx_const_param_eval_output *r1 = a;
647      const struct d3dx_const_param_eval_output *r2 = b;
648  
649      if (r1->table != r2->table)
650          return r1->table - r2->table;
651      return r1->register_index - r2->register_index;
652  }
653  
654  static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab,
655          struct d3dx_parameter *param, unsigned int index)
656  {
657      unsigned int i, start_index = index;
658      DWORD *current_data;
659      enum pres_reg_tables current_table;
660      unsigned int current_start_offset, element_count;
661      struct d3dx_const_param_eval_output *first_const;
662  
663      if (!const_tab->const_set_count)
664          return D3D_OK;
665  
666      while (index < const_tab->const_set_count - 1)
667      {
668          first_const = &const_tab->const_set[index];
669          current_data = first_const->param->data;
670          current_table = first_const->table;
671          current_start_offset = get_offset_reg(current_table, first_const->register_index);
672          element_count = 0;
673          for (i = index; i < const_tab->const_set_count; ++i)
674          {
675              struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i];
676              unsigned int count = get_offset_reg(const_set->table,
677                      const_set->register_count * const_set->element_count);
678              unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index);
679  
680              if (!(const_set->table == current_table && current_start_offset == start_offset
681                      && const_set->direct_copy == first_const->direct_copy
682                      && current_data == const_set->param->data
683                      && (const_set->direct_copy || (first_const->param->type == const_set->param->type
684                      && first_const->param->class == const_set->param->class
685                      && first_const->param->columns == const_set->param->columns
686                      && first_const->param->rows == const_set->param->rows
687                      && first_const->register_count == const_set->register_count
688                      && (i == const_tab->const_set_count - 1
689                      || first_const->param->element_count == const_set->param->element_count)))))
690                  break;
691  
692              current_start_offset += count;
693              current_data += const_set->direct_copy ? count : const_set->param->rows
694                      * const_set->param->columns * const_set->element_count;
695              element_count += const_set->element_count;
696          }
697  
698          if (i > index + 1)
699          {
700              TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index,
701                      debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy);
702  
703              first_const->element_count = element_count;
704              if (first_const->direct_copy)
705              {
706                  first_const->element_count = 1;
707                  if (index == start_index
708                          && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT))
709                  {
710                      if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
711                          return D3DERR_INVALIDCALL;
712                      first_const->param = param;
713                  }
714                  first_const->register_count = get_reg_offset(current_table, current_start_offset)
715                          - first_const->register_index;
716              }
717              memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i],
718                      sizeof(*const_tab->const_set) * (const_tab->const_set_count - i));
719              const_tab->const_set_count -= i - index - 1;
720          }
721          else
722          {
723              TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n",
724                      const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy);
725          }
726          index = i;
727      }
728      return D3D_OK;
729  }
730  
731  static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab,
732          D3DXHANDLE hc, struct d3dx_parameter *param)
733  {
734      D3DXCONSTANT_DESC desc;
735      unsigned int const_count, param_count, i;
736      BOOL get_element;
737      struct d3dx_const_param_eval_output const_set;
738      struct const_upload_info info;
739      enum pres_value_type table_type;
740      HRESULT hr;
741  
742      if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL)))
743          return D3DERR_INVALIDCALL;
744  
745      if (param->element_count)
746      {
747          param_count = param->element_count;
748          const_count = desc.Elements;
749          get_element = TRUE;
750      }
751      else
752      {
753          if (desc.Elements > 1)
754          {
755              FIXME("Unexpected number of constant elements %u.\n", desc.Elements);
756              return D3DERR_INVALIDCALL;
757          }
758          param_count = param->member_count;
759          const_count = desc.StructMembers;
760          get_element = FALSE;
761      }
762      if (const_count != param_count)
763      {
764          FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n",
765                  param_count, const_count);
766          return D3DERR_INVALIDCALL;
767      }
768      if (const_count)
769      {
770          HRESULT ret = D3D_OK;
771          D3DXHANDLE hc_element;
772          unsigned int index = const_tab->const_set_count;
773  
774          for (i = 0; i < const_count; ++i)
775          {
776              if (get_element)
777                  hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i);
778              else
779                  hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i);
780              if (!hc_element)
781              {
782                  FIXME("Could not get constant.\n");
783                  hr = D3DERR_INVALIDCALL;
784              }
785              else
786              {
787                  hr = init_set_constants_param(const_tab, ctab, hc_element, &param->members[i]);
788              }
789              if (FAILED(hr))
790                  ret = hr;
791          }
792          if (FAILED(ret))
793              return ret;
794          return merge_const_set_entries(const_tab, param, index);
795      }
796  
797      TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n",
798              debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes);
799      TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n",
800              debugstr_a(param->name), param->rows, param->columns, param->class,
801              param->flags, param->bytes);
802  
803      const_set.element_count = 1;
804      const_set.param = param;
805      const_set.constant_class = desc.Class;
806      if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table))
807      {
808          FIXME("Unknown register set %u.\n", desc.RegisterSet);
809          return D3DERR_INVALIDCALL;
810      }
811      const_set.register_index = desc.RegisterIndex;
812      const_set.table = const_tab->regset2table[desc.RegisterSet];
813      if (const_set.table >= PRES_REGTAB_COUNT)
814      {
815          ERR("Unexpected register set %u.\n", desc.RegisterSet);
816          return D3DERR_INVALIDCALL;
817      }
818      assert(table_info[const_set.table].component_size == sizeof(unsigned int));
819      assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int));
820      const_set.register_count = desc.RegisterCount;
821      table_type = table_info[const_set.table].type;
822      get_const_upload_info(&const_set, &info);
823      if (!info.count)
824      {
825          TRACE("%s has zero count, skipping.\n", debugstr_a(param->name));
826          return D3D_OK;
827      }
828  
829      if (table_type_from_param_type(param->type) == PRES_VT_COUNT)
830          return D3DERR_INVALIDCALL;
831  
832      const_set.direct_copy = table_type_from_param_type(param->type) == table_type
833              && !info.transpose && info.minor == info.major_stride
834              && info.count == get_offset_reg(const_set.table, const_set.register_count)
835              && info.count * sizeof(unsigned int) <= param->bytes;
836      if (info.minor_remainder && !const_set.direct_copy && !info.transpose)
837          FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n",
838                  debugstr_a(param->name));
839  
840      if (info.major_count > info.major
841              || (info.major_count == info.major && info.minor_remainder))
842      {
843          WARN("Constant dimensions exceed parameter size.\n");
844          return D3DERR_INVALIDCALL;
845      }
846  
847      if (FAILED(hr = append_const_set(const_tab, &const_set)))
848          return hr;
849  
850      return D3D_OK;
851  }
852  
853  static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out,
854          struct d3dx9_base_effect *base, const char **skip_constants,
855          unsigned int skip_constants_count, struct d3dx_preshader *pres)
856  {
857      ID3DXConstantTable *ctab;
858      D3DXCONSTANT_DESC *cdesc;
859      struct d3dx_parameter **inputs_param;
860      D3DXCONSTANTTABLE_DESC desc;
861      HRESULT hr;
862      D3DXHANDLE hc;
863      unsigned int i, j;
864  
865      hr = D3DXGetShaderConstantTable(byte_code, &ctab);
866      if (FAILED(hr) || !ctab)
867      {
868          TRACE("Could not get CTAB data, hr %#x.\n", hr);
869          /* returning OK, shaders and preshaders without CTAB are valid */
870          return D3D_OK;
871      }
872      if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc)))
873      {
874          FIXME("Could not get CTAB desc, hr %#x.\n", hr);
875          goto cleanup;
876      }
877  
878      out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants);
879      out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants);
880      if (!cdesc || !inputs_param)
881      {
882          hr = E_OUTOFMEMORY;
883          goto cleanup;
884      }
885  
886      for (i = 0; i < desc.Constants; ++i)
887      {
888          unsigned int index = out->input_count;
889          WORD constantinfo_reserved;
890  
891          hc = ID3DXConstantTable_GetConstant(ctab, NULL, i);
892          if (!hc)
893          {
894              FIXME("Null constant handle.\n");
895              goto cleanup;
896          }
897          if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved)))
898              goto cleanup;
899          inputs_param[index] = get_parameter_by_name(base, NULL, cdesc[index].Name);
900          if (!inputs_param[index])
901          {
902              WARN("Could not find parameter %s in effect.\n", cdesc[index].Name);
903              continue;
904          }
905          if (cdesc[index].Class == D3DXPC_OBJECT)
906          {
907              TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]);
908              if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT
909                      || !is_param_type_sampler(inputs_param[index]->type))
910              {
911                  WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name));
912                  hr = D3DERR_INVALIDCALL;
913                  goto cleanup;
914              }
915              if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount)
916              {
917                  WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name));
918                  hr = D3DERR_INVALIDCALL;
919                  goto cleanup;
920              }
921          }
922          if (!is_top_level_parameter(inputs_param[index]))
923          {
924              WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name));
925              hr = E_FAIL;
926              goto cleanup;
927          }
928  
929          for (j = 0; j < skip_constants_count; ++j)
930          {
931              if (!strcmp(cdesc[index].Name, skip_constants[j]))
932              {
933                  if (!constantinfo_reserved)
934                  {
935                      WARN("skip_constants parameter %s is not register bound.\n",
936                              cdesc[index].Name);
937                      hr = D3DERR_INVALIDCALL;
938                      goto cleanup;
939                  }
940                  TRACE("Skipping constant %s.\n", cdesc[index].Name);
941                  break;
942              }
943          }
944          if (j < skip_constants_count)
945              continue;
946          ++out->input_count;
947          if (inputs_param[index]->class == D3DXPC_OBJECT)
948              continue;
949          if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index])))
950              goto cleanup;
951      }
952      if (pres)
953          append_pres_const_sets_for_shader_input(out, pres);
954      if (out->const_set_count)
955      {
956          struct d3dx_const_param_eval_output *new_alloc;
957  
958          qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set);
959  
960          i = 0;
961          while (i < out->const_set_count - 1)
962          {
963              if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD
964                      && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD
965                      && out->const_set[i].table == out->const_set[i + 1].table
966                      && out->const_set[i].register_index + out->const_set[i].register_count
967                      >= out->const_set[i + 1].register_index)
968              {
969                  assert(out->const_set[i].register_index + out->const_set[i].register_count
970                          <= out->const_set[i + 1].register_index + 1);
971                  out->const_set[i].register_count = out->const_set[i + 1].register_index + 1
972                          - out->const_set[i].register_index;
973                  memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i])
974                          * (out->const_set_count - i - 2));
975                  --out->const_set_count;
976              }
977              else
978              {
979                  ++i;
980              }
981          }
982  
983          new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set,
984                  sizeof(*out->const_set) * out->const_set_count);
985          if (new_alloc)
986          {
987              out->const_set = new_alloc;
988              out->const_set_size = out->const_set_count;
989          }
990          else
991          {
992              WARN("Out of memory.\n");
993          }
994      }
995  cleanup:
996      ID3DXConstantTable_Release(ctab);
997      return hr;
998  }
999  
1000  static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register)
1001  {
1002      if (table < PRES_REGTAB_COUNT)
1003          table_sizes[table] = max(table_sizes[table], max_register + 1);
1004  }
1005  
1006  static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab)
1007  {
1008      unsigned int i, table, max_register;
1009  
1010      for (i = 0; i < ctab->input_count; ++i)
1011      {
1012          if (!ctab->inputs[i].RegisterCount)
1013              continue;
1014          max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1;
1015          table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1016          update_table_size(table_sizes, table, max_register);
1017      }
1018  }
1019  
1020  static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count)
1021  {
1022      static const char *xyzw_str = "xyzw";
1023      unsigned int i, table;
1024  
1025      table = arg->reg.table;
1026      if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT)
1027      {
1028          TRACE("(");
1029          for (i = 0; i < component_count; ++i)
1030              TRACE(i < component_count - 1 ? "%.16e, " : "%.16e",
1031                      ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]);
1032          TRACE(")");
1033      }
1034      else
1035      {
1036          if (arg->index_reg.table == PRES_REGTAB_COUNT)
1037          {
1038              TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset));
1039          }
1040          else
1041          {
1042              unsigned int index_reg;
1043  
1044              index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset);
1045              TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset),
1046                      table_symbol[arg->index_reg.table], index_reg,
1047                      xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]);
1048          }
1049          for (i = 0; i < component_count; ++i)
1050              TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]);
1051      }
1052  }
1053  
1054  static void dump_registers(struct d3dx_const_tab *ctab)
1055  {
1056      unsigned int table, i;
1057  
1058      for (i = 0; i < ctab->input_count; ++i)
1059      {
1060          table = ctab->regset2table[ctab->inputs[i].RegisterSet];
1061          TRACE("//   %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)",
1062                  table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount);
1063      }
1064  }
1065  
1066  static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins)
1067  {
1068      unsigned int i;
1069  
1070      TRACE("%s ", pres_op_info[ins->op].mnem);
1071      dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count);
1072      for (i = 0; i < pres_op_info[ins->op].input_count; ++i)
1073      {
1074          TRACE(", ");
1075          dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count);
1076      }
1077      TRACE("\n");
1078  }
1079  
1080  static void dump_preshader(struct d3dx_preshader *pres)
1081  {
1082      unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4;
1083      const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED];
1084  
1085      if (immediate_count)
1086          TRACE("// Immediates:\n");
1087      for (i = 0; i < immediate_count; ++i)
1088      {
1089          if (!(i % 4))
1090              TRACE("// ");
1091          TRACE("%.8e", immediates[i]);
1092          if (i % 4 == 3)
1093              TRACE("\n");
1094          else
1095              TRACE(", ");
1096      }
1097      TRACE("// Preshader registers:\n");
1098      dump_registers(&pres->inputs);
1099      TRACE("preshader\n");
1100      for (i = 0; i < pres->ins_count; ++i)
1101          dump_ins(&pres->regs, &pres->ins[i]);
1102  }
1103  
1104  static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base)
1105  {
1106      unsigned int *p;
1107      unsigned int i, j, const_count;
1108      double *dconst;
1109      HRESULT hr;
1110      unsigned int saved_word;
1111      unsigned int section_size;
1112  
1113      TRACE("Preshader version %#x.\n", *ptr & 0xffff);
1114  
1115      if (!count)
1116      {
1117          WARN("Unexpected end of byte code buffer.\n");
1118          return D3DXERR_INVALIDDATA;
1119      }
1120  
1121      p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, &section_size);
1122      if (p)
1123      {
1124          const_count = *p++;
1125          if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int)))
1126          {
1127              WARN("Byte code buffer ends unexpectedly.\n");
1128              return D3DXERR_INVALIDDATA;
1129          }
1130          dconst = (double *)p;
1131      }
1132      else
1133      {
1134          const_count = 0;
1135          dconst = NULL;
1136      }
1137      TRACE("%u double constants.\n", const_count);
1138  
1139      p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, &section_size);
1140      if (!p)
1141      {
1142          WARN("Could not find preshader code.\n");
1143          return D3D_OK;
1144      }
1145      pres->ins_count = *p++;
1146      --section_size;
1147      if (pres->ins_count > UINT_MAX / sizeof(*pres->ins))
1148      {
1149          WARN("Invalid instruction count %u.\n", pres->ins_count);
1150          return D3DXERR_INVALIDDATA;
1151      }
1152      TRACE("%u instructions.\n", pres->ins_count);
1153      pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count);
1154      if (!pres->ins)
1155          return E_OUTOFMEMORY;
1156      for (i = 0; i < pres->ins_count; ++i)
1157      {
1158          unsigned int *ptr_next;
1159  
1160          ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]);
1161          if (!ptr_next)
1162              return D3DXERR_INVALIDDATA;
1163          section_size -= ptr_next - p;
1164          p = ptr_next;
1165      }
1166  
1167      pres->inputs.regset2table = pres_regset2table;
1168  
1169      saved_word = *ptr;
1170      *ptr = 0xfffe0000;
1171      hr = get_constants_desc(ptr, &pres->inputs, base, NULL, 0, NULL);
1172      *ptr = saved_word;
1173      if (FAILED(hr))
1174          return hr;
1175  
1176      if (const_count % get_reg_components(PRES_REGTAB_IMMED))
1177      {
1178          FIXME("const_count %u is not a multiple of %u.\n", const_count,
1179                  get_reg_components(PRES_REGTAB_IMMED));
1180          return D3DXERR_INVALIDDATA;
1181      }
1182      pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count);
1183  
1184      update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs);
1185      for (i = 0; i < pres->ins_count; ++i)
1186      {
1187          for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j)
1188          {
1189              enum pres_reg_tables table;
1190              unsigned int reg_idx;
1191  
1192              if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT)
1193              {
1194                  unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0
1195                          : pres->ins[i].component_count - 1;
1196  
1197                  table = pres->ins[i].inputs[j].reg.table;
1198                  reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset
1199                          + last_component_index);
1200              }
1201              else
1202              {
1203                  table = pres->ins[i].inputs[j].index_reg.table;
1204                  reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset);
1205              }
1206              if (reg_idx >= pres->regs.table_sizes[table])
1207              {
1208                  FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u.\n",
1209                          i, j, table, reg_idx);
1210                  return D3DXERR_INVALIDDATA;
1211              }
1212          }
1213          update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table,
1214                  get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset));
1215      }
1216      if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED)))
1217          return E_OUTOFMEMORY;
1218      regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count);
1219  
1220      return D3D_OK;
1221  }
1222  
1223  HRESULT d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size,
1224          D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter,
1225          const char **skip_constants, unsigned int skip_constants_count)
1226  {
1227      struct d3dx_param_eval *peval;
1228      unsigned int *ptr, *shader_ptr = NULL;
1229      unsigned int i;
1230      BOOL shader;
1231      unsigned int count, pres_size;
1232      HRESULT ret;
1233  
1234      TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n",
1235              base_effect, byte_code, byte_code_size, type, peval_out);
1236  
1237      count = byte_code_size / sizeof(unsigned int);
1238      if (!byte_code || !count)
1239      {
1240          *peval_out = NULL;
1241          return D3D_OK;
1242      }
1243  
1244      peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval));
1245      if (!peval)
1246      {
1247          ret = E_OUTOFMEMORY;
1248          goto err_out;
1249      }
1250      peval->version_counter = version_counter;
1251  
1252      peval->param_type = type;
1253      switch (type)
1254      {
1255          case D3DXPT_VERTEXSHADER:
1256          case D3DXPT_PIXELSHADER:
1257              shader = TRUE;
1258              break;
1259          default:
1260              shader = FALSE;
1261              break;
1262      }
1263      peval->shader_inputs.regset2table = shad_regset2table;
1264  
1265      ptr = (unsigned int *)byte_code;
1266      if (shader)
1267      {
1268          if ((*ptr & 0xfffe0000) != 0xfffe0000)
1269          {
1270              FIXME("Invalid shader signature %#x.\n", *ptr);
1271              ret = D3DXERR_INVALIDDATA;
1272              goto err_out;
1273          }
1274          TRACE("Shader version %#x.\n", *ptr & 0xffff);
1275          shader_ptr = ptr;
1276          ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size);
1277          if (!ptr)
1278              TRACE("No preshader found.\n");
1279      }
1280      else
1281      {
1282          pres_size = count;
1283      }
1284  
1285      if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, base_effect)))
1286      {
1287          FIXME("Failed parsing preshader, byte code for analysis follows.\n");
1288          dump_bytecode(byte_code, byte_code_size);
1289          goto err_out;
1290      }
1291  
1292      if (shader)
1293      {
1294          if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, base_effect,
1295                  skip_constants, skip_constants_count, &peval->pres)))
1296          {
1297              TRACE("Could not get shader constant table, hr %#x.\n", ret);
1298              goto err_out;
1299          }
1300          update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs);
1301      }
1302  
1303      for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i)
1304      {
1305          if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i)))
1306              goto err_out;
1307      }
1308  
1309      if (TRACE_ON(d3dx))
1310      {
1311          dump_bytecode(byte_code, byte_code_size);
1312          dump_preshader(&peval->pres);
1313          if (shader)
1314          {
1315              TRACE("// Shader registers:\n");
1316              dump_registers(&peval->shader_inputs);
1317          }
1318      }
1319      *peval_out = peval;
1320      TRACE("Created parameter evaluator %p.\n", *peval_out);
1321      return D3D_OK;
1322  
1323  err_out:
1324      WARN("Error creating parameter evaluator.\n");
1325      if (TRACE_ON(d3dx))
1326          dump_bytecode(byte_code, byte_code_size);
1327  
1328      d3dx_free_param_eval(peval);
1329      *peval_out = NULL;
1330      return ret;
1331  }
1332  
1333  static void d3dx_free_const_tab(struct d3dx_const_tab *ctab)
1334  {
1335      HeapFree(GetProcessHeap(), 0, ctab->inputs);
1336      HeapFree(GetProcessHeap(), 0, ctab->inputs_param);
1337      HeapFree(GetProcessHeap(), 0, ctab->const_set);
1338  }
1339  
1340  static void d3dx_free_preshader(struct d3dx_preshader *pres)
1341  {
1342      HeapFree(GetProcessHeap(), 0, pres->ins);
1343  
1344      regstore_free_tables(&pres->regs);
1345      d3dx_free_const_tab(&pres->inputs);
1346  }
1347  
1348  void d3dx_free_param_eval(struct d3dx_param_eval *peval)
1349  {
1350      TRACE("peval %p.\n", peval);
1351  
1352      if (!peval)
1353          return;
1354  
1355      d3dx_free_preshader(&peval->pres);
1356      d3dx_free_const_tab(&peval->shader_inputs);
1357      HeapFree(GetProcessHeap(), 0, peval);
1358  }
1359  
1360  static void pres_int_from_float(void *out, const void *in, unsigned int count)
1361  {
1362      unsigned int i;
1363      const float *in_float = in;
1364      int *out_int = out;
1365  
1366      for (i = 0; i < count; ++i)
1367          out_int[i] = in_float[i];
1368  }
1369  
1370  static void pres_bool_from_value(void *out, const void *in, unsigned int count)
1371  {
1372      unsigned int i;
1373      const DWORD *in_dword = in;
1374      BOOL *out_bool = out;
1375  
1376      for (i = 0; i < count; ++i)
1377          out_bool[i] = !!in_dword[i];
1378  }
1379  
1380  static void pres_float_from_int(void *out, const void *in, unsigned int count)
1381  {
1382      unsigned int i;
1383      const int *in_int = in;
1384      float *out_float = out;
1385  
1386      for (i = 0; i < count; ++i)
1387          out_float[i] = in_int[i];
1388  }
1389  
1390  static void pres_float_from_bool(void *out, const void *in, unsigned int count)
1391  {
1392      unsigned int i;
1393      const BOOL *in_bool = in;
1394      float *out_float = out;
1395  
1396      for (i = 0; i < count; ++i)
1397          out_float[i] = !!in_bool[i];
1398  }
1399  
1400  static void pres_int_from_bool(void *out, const void *in, unsigned int count)
1401  {
1402      unsigned int i;
1403      const float *in_bool = in;
1404      int *out_int = out;
1405  
1406      for (i = 0; i < count; ++i)
1407          out_int[i] = !!in_bool[i];
1408  }
1409  
1410  static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table,
1411          unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type)
1412  {
1413      typedef void (*conv_func)(void *out, const void *in, unsigned int count);
1414      static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] =
1415      {
1416          {NULL,                 NULL, pres_int_from_float, pres_bool_from_value},
1417          {NULL,                 NULL, NULL,                NULL},
1418          {pres_float_from_int,  NULL, NULL,                pres_bool_from_value},
1419          {pres_float_from_bool, NULL, pres_int_from_bool,  NULL}
1420      };
1421      enum pres_value_type table_type = table_info[table].type;
1422  
1423      if (param_type == table_type)
1424      {
1425          regstore_set_values(rs, table, in, offset, count);
1426          return;
1427      }
1428  
1429      set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count);
1430  }
1431  
1432  static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1433          D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr,
1434          unsigned int start, unsigned int count)
1435  {
1436      if (type == D3DXPT_VERTEXSHADER)
1437      {
1438          switch(table)
1439          {
1440              case PRES_REGTAB_OCONST:
1441                  return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count);
1442              case PRES_REGTAB_OICONST:
1443                  return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count);
1444              case PRES_REGTAB_OBCONST:
1445                  return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count);
1446              default:
1447                  FIXME("Unexpected register table %u.\n", table);
1448                  return D3DERR_INVALIDCALL;
1449          }
1450      }
1451      else if (type == D3DXPT_PIXELSHADER)
1452      {
1453          switch(table)
1454          {
1455              case PRES_REGTAB_OCONST:
1456                  return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count);
1457              case PRES_REGTAB_OICONST:
1458                  return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count);
1459              case PRES_REGTAB_OBCONST:
1460                  return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count);
1461              default:
1462                  FIXME("Unexpected register table %u.\n", table);
1463                  return D3DERR_INVALIDCALL;
1464          }
1465      }
1466      else
1467      {
1468          FIXME("Unexpected parameter type %u.\n", type);
1469          return D3DERR_INVALIDCALL;
1470      }
1471  }
1472  
1473  static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab,
1474          ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1475          D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty)
1476  {
1477      unsigned int const_idx;
1478      unsigned int current_start = 0, current_count = 0;
1479      enum pres_reg_tables current_table = PRES_REGTAB_COUNT;
1480      BOOL update_device = manager || device;
1481      HRESULT hr, result = D3D_OK;
1482      ULONG64 update_version = const_tab->update_version;
1483  
1484      for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1485      {
1486          struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1487          enum pres_reg_tables table = const_set->table;
1488          struct d3dx_parameter *param = const_set->param;
1489          unsigned int element, i, j, start_offset;
1490          struct const_upload_info info;
1491          unsigned int *data;
1492          enum pres_value_type param_type;
1493  
1494          if (!(param && is_param_dirty(param, update_version)))
1495              continue;
1496  
1497          data = param->data;
1498          start_offset = get_offset_reg(table, const_set->register_index);
1499          if (const_set->direct_copy)
1500          {
1501              regstore_set_values(rs, table, data, start_offset,
1502                      get_offset_reg(table, const_set->register_count));
1503              continue;
1504          }
1505          param_type = table_type_from_param_type(param->type);
1506          if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR)
1507          {
1508              unsigned int count = max(param->rows, param->columns);
1509  
1510              if (count >= get_reg_components(table))
1511              {
1512                  regstore_set_data(rs, table, start_offset, data,
1513                          count * const_set->element_count, param_type);
1514              }
1515              else
1516              {
1517                  for (element = 0; element < const_set->element_count; ++element)
1518                      regstore_set_data(rs, table, start_offset + get_offset_reg(table, element),
1519                              &data[element * count], count, param_type);
1520              }
1521              continue;
1522          }
1523          get_const_upload_info(const_set, &info);
1524          for (element = 0; element < const_set->element_count; ++element)
1525          {
1526              unsigned int *out = (unsigned int *)rs->tables[table] + start_offset;
1527  
1528              /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer.
1529               * All the supported types of parameters and table values have the same size. */
1530              if (info.transpose)
1531              {
1532                  for (i = 0; i < info.major_count; ++i)
1533                      for (j = 0; j < info.minor; ++j)
1534                          out[i * info.major_stride + j] = data[i + j * info.major];
1535  
1536                  for (j = 0; j < info.minor_remainder; ++j)
1537                      out[i * info.major_stride + j] = data[i + j * info.major];
1538              }
1539              else
1540              {
1541                  for (i = 0; i < info.major_count; ++i)
1542                      for (j = 0; j < info.minor; ++j)
1543                          out[i * info.major_stride + j] = data[i * info.minor + j];
1544              }
1545              start_offset += get_offset_reg(table, const_set->register_count);
1546              data += param->rows * param->columns;
1547          }
1548          start_offset = get_offset_reg(table, const_set->register_index);
1549          if (table_info[table].type != param_type)
1550              regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset,
1551                      get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type);
1552      }
1553      const_tab->update_version = new_update_version;
1554      if (!update_device)
1555          return D3D_OK;
1556  
1557      for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx)
1558      {
1559          struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx];
1560  
1561          if (device_update_all || (const_set->param
1562                  ? is_param_dirty(const_set->param, update_version) : pres_dirty))
1563          {
1564              enum pres_reg_tables table = const_set->table;
1565  
1566              if (table == current_table && current_start + current_count == const_set->register_index)
1567              {
1568                  current_count += const_set->register_count * const_set->element_count;
1569              }
1570              else
1571              {
1572                  if (current_count)
1573                  {
1574                      if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1575                              (DWORD *)rs->tables[current_table]
1576                              + get_offset_reg(current_table, current_start), current_start, current_count)))
1577                          result = hr;
1578                  }
1579                  current_table = table;
1580                  current_start = const_set->register_index;
1581                  current_count = const_set->register_count * const_set->element_count;
1582              }
1583          }
1584      }
1585      if (current_count)
1586      {
1587          if (FAILED(hr = set_constants_device(manager, device, type, current_table,
1588                  (DWORD *)rs->tables[current_table]
1589                  + get_offset_reg(current_table, current_start), current_start, current_count)))
1590              result = hr;
1591      }
1592      return result;
1593  }
1594  
1595  static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset)
1596  {
1597      return regstore_get_double(rs, table, offset);
1598  }
1599  
1600  static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp)
1601  {
1602      unsigned int offset, base_index, reg_index, table;
1603  
1604      table = opr->reg.table;
1605  
1606      if (opr->index_reg.table == PRES_REGTAB_COUNT)
1607          base_index = 0;
1608      else
1609          base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset));
1610  
1611      offset = get_offset_reg(table, base_index) + opr->reg.offset + comp;
1612      reg_index = get_reg_offset(table, offset);
1613  
1614      if (reg_index >= rs->table_sizes[table])
1615      {
1616          unsigned int wrap_size;
1617  
1618          if (table == PRES_REGTAB_CONST)
1619          {
1620              /* As it can be guessed from tests, offset into floating constant table is wrapped
1621               * to the nearest power of 2 and not to the actual table size. */
1622              for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1)
1623                  ;
1624          }
1625          else
1626          {
1627              wrap_size = rs->table_sizes[table];
1628          }
1629          WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n",
1630                  reg_index, table, wrap_size, rs->table_sizes[table]);
1631          reg_index %= wrap_size;
1632  
1633          if (reg_index >= rs->table_sizes[table])
1634              return 0.0;
1635  
1636          offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table);
1637      }
1638  
1639      return exec_get_reg_value(rs, table, offset);
1640  }
1641  
1642  static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg,
1643          unsigned int comp, double res)
1644  {
1645      regstore_set_double(rs, reg->table, reg->offset + comp, res);
1646  }
1647  
1648  #define ARGS_ARRAY_SIZE 8
1649  static HRESULT execute_preshader(struct d3dx_preshader *pres)
1650  {
1651      unsigned int i, j, k;
1652      double args[ARGS_ARRAY_SIZE];
1653      double res;
1654  
1655      for (i = 0; i < pres->ins_count; ++i)
1656      {
1657          const struct d3dx_pres_ins *ins;
1658          const struct op_info *oi;
1659  
1660          ins = &pres->ins[i];
1661          oi = &pres_op_info[ins->op];
1662          if (oi->func_all_comps)
1663          {
1664              if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE)
1665              {
1666                  FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count);
1667                  return E_FAIL;
1668              }
1669              for (k = 0; k < oi->input_count; ++k)
1670                  for (j = 0; j < ins->component_count; ++j)
1671                      args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k],
1672                              ins->scalar_op && !k ? 0 : j);
1673              res = oi->func(args, ins->component_count);
1674  
1675              /* only 'dot' instruction currently falls here */
1676              exec_set_arg(&pres->regs, &ins->output.reg, 0, res);
1677          }
1678          else
1679          {
1680              for (j = 0; j < ins->component_count; ++j)
1681              {
1682                  for (k = 0; k < oi->input_count; ++k)
1683                      args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j);
1684                  res = oi->func(args, ins->component_count);
1685                  exec_set_arg(&pres->regs, &ins->output.reg, j, res);
1686              }
1687          }
1688      }
1689      return D3D_OK;
1690  }
1691  
1692  static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version)
1693  {
1694      unsigned int i;
1695  
1696      if (update_version == ULONG64_MAX)
1697          update_version = ctab->update_version;
1698      for (i = 0; i < ctab->input_count; ++i)
1699      {
1700          if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]),
1701                  update_version))
1702              return TRUE;
1703      }
1704      return FALSE;
1705  }
1706  
1707  BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version)
1708  {
1709      return is_const_tab_input_dirty(&peval->pres.inputs, update_version)
1710              || is_const_tab_input_dirty(&peval->shader_inputs, update_version);
1711  }
1712  
1713  HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param,
1714          void *param_value)
1715  {
1716      HRESULT hr;
1717      unsigned int i;
1718      unsigned int elements, elements_param, elements_table;
1719      float *oc;
1720  
1721      TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value);
1722  
1723      if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX))
1724      {
1725          set_constants(&peval->pres.regs, &peval->pres.inputs,
1726                  next_update_version(peval->version_counter),
1727                  NULL, NULL, peval->param_type, FALSE, FALSE);
1728  
1729          if (FAILED(hr = execute_preshader(&peval->pres)))
1730              return hr;
1731      }
1732  
1733      elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]);
1734      elements_param = param->bytes / sizeof(unsigned int);
1735      elements = min(elements_table, elements_param);
1736      oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST];
1737      for (i = 0; i < elements; ++i)
1738          set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT);
1739      return D3D_OK;
1740  }
1741  
1742  HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device,
1743          struct d3dx_param_eval *peval, BOOL update_all)
1744  {
1745      HRESULT hr;
1746      struct d3dx_preshader *pres = &peval->pres;
1747      struct d3dx_regstore *rs = &pres->regs;
1748      ULONG64 new_update_version = next_update_version(peval->version_counter);
1749      BOOL pres_dirty = FALSE;
1750  
1751      TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type);
1752  
1753      if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX))
1754      {
1755          set_constants(rs, &pres->inputs, new_update_version,
1756                  NULL, NULL, peval->param_type, FALSE, FALSE);
1757          if (FAILED(hr = execute_preshader(pres)))
1758              return hr;
1759          pres_dirty = TRUE;
1760      }
1761  
1762      return set_constants(rs, &peval->shader_inputs, new_update_version,
1763              manager, device, peval->param_type, update_all, pres_dirty);
1764  }
1765