1 #ifdef __REACTOS__ 2 #include "precomp.h" 3 #else 4 /* 5 * Copyright 2016 Paul Gofman 6 * 7 * This library is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * This library is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with this library; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 20 */ 21 22 #include "config.h" 23 #include "wine/port.h" 24 25 #include "d3dx9_private.h" 26 27 #include <float.h> 28 #include <assert.h> 29 #endif /* __REACTOS__ */ 30 31 WINE_DEFAULT_DEBUG_CHANNEL(d3dx); 32 33 #ifdef __REACTOS__ 34 /* ReactOS FIXME: Insect */ 35 #define fmin min 36 #define fmax max 37 #endif 38 39 enum pres_ops 40 { 41 PRESHADER_OP_NOP, 42 PRESHADER_OP_MOV, 43 PRESHADER_OP_NEG, 44 PRESHADER_OP_RCP, 45 PRESHADER_OP_FRC, 46 PRESHADER_OP_EXP, 47 PRESHADER_OP_LOG, 48 PRESHADER_OP_RSQ, 49 PRESHADER_OP_SIN, 50 PRESHADER_OP_COS, 51 PRESHADER_OP_ASIN, 52 PRESHADER_OP_ACOS, 53 PRESHADER_OP_ATAN, 54 PRESHADER_OP_MIN, 55 PRESHADER_OP_MAX, 56 PRESHADER_OP_LT, 57 PRESHADER_OP_GE, 58 PRESHADER_OP_ADD, 59 PRESHADER_OP_MUL, 60 PRESHADER_OP_ATAN2, 61 PRESHADER_OP_DIV, 62 PRESHADER_OP_CMP, 63 PRESHADER_OP_DOT, 64 PRESHADER_OP_DOTSWIZ6, 65 PRESHADER_OP_DOTSWIZ8, 66 }; 67 68 typedef double (*pres_op_func)(double *args, int n); 69 70 static double to_signed_nan(double v) 71 { 72 static const union 73 { 74 ULONG64 ulong64_value; 75 double double_value; 76 } 77 signed_nan = 78 { 79 0xfff8000000000000 80 }; 81 82 return isnan(v) ? signed_nan.double_value : v; 83 } 84 85 static double pres_mov(double *args, int n) {return args[0];} 86 static double pres_add(double *args, int n) {return args[0] + args[1];} 87 static double pres_mul(double *args, int n) {return args[0] * args[1];} 88 static double pres_dot(double *args, int n) 89 { 90 int i; 91 double sum; 92 93 sum = 0.0; 94 for (i = 0; i < n; ++i) 95 sum += args[i] * args[i + n]; 96 return sum; 97 } 98 99 static double pres_dotswiz6(double *args, int n) 100 { 101 return pres_dot(args, 3); 102 } 103 104 static double pres_dotswiz8(double *args, int n) 105 { 106 return pres_dot(args, 4); 107 } 108 109 static double pres_neg(double *args, int n) {return -args[0];} 110 static double pres_rcp(double *args, int n) {return 1.0 / args[0];} 111 static double pres_lt(double *args, int n) {return args[0] < args[1] ? 1.0 : 0.0;} 112 static double pres_ge(double *args, int n) {return args[0] >= args[1] ? 1.0 : 0.0;} 113 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);} 114 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);} 115 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);} 116 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];} 117 static double pres_sin(double *args, int n) {return sin(args[0]);} 118 static double pres_cos(double *args, int n) {return cos(args[0]);} 119 static double pres_rsq(double *args, int n) 120 { 121 double v; 122 123 v = fabs(args[0]); 124 if (v == 0.0) 125 return INFINITY; 126 else 127 return 1.0 / sqrt(v); 128 } 129 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);} 130 static double pres_log(double *args, int n) 131 { 132 double v; 133 134 v = fabs(args[0]); 135 if (v == 0.0) 136 return 0.0; 137 else 138 #ifdef HAVE_LOG2 139 return log2(v); 140 #else 141 return log(v) / log(2); 142 #endif 143 } 144 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));} 145 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));} 146 static double pres_atan(double *args, int n) {return atan(args[0]);} 147 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);} 148 149 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever 150 * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */ 151 static double pres_div(double *args, int n) {return 0.0;} 152 153 #define PRES_OPCODE_MASK 0x7ff00000 154 #define PRES_OPCODE_SHIFT 20 155 #define PRES_SCALAR_FLAG 0x80000000 156 #define PRES_NCOMP_MASK 0x0000ffff 157 158 #define FOURCC_PRES 0x53455250 159 #define FOURCC_CLIT 0x54494c43 160 #define FOURCC_FXLC 0x434c5846 161 #define FOURCC_PRSI 0x49535250 162 #define PRES_SIGN 0x46580000 163 164 struct op_info 165 { 166 unsigned int opcode; 167 char mnem[16]; 168 unsigned int input_count; 169 BOOL func_all_comps; 170 pres_op_func func; 171 }; 172 173 static const struct op_info pres_op_info[] = 174 { 175 {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */ 176 {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */ 177 {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */ 178 {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */ 179 {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */ 180 {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */ 181 {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */ 182 {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */ 183 {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */ 184 {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */ 185 {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */ 186 {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */ 187 {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */ 188 {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */ 189 {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */ 190 {0x202, "lt", 2, 0, pres_lt }, /* PRESHADER_OP_LT */ 191 {0x203, "ge", 2, 0, pres_ge }, /* PRESHADER_OP_GE */ 192 {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */ 193 {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */ 194 {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */ 195 {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */ 196 {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */ 197 {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */ 198 {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */ 199 {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */ 200 }; 201 202 enum pres_value_type 203 { 204 PRES_VT_FLOAT, 205 PRES_VT_DOUBLE, 206 PRES_VT_INT, 207 PRES_VT_BOOL, 208 PRES_VT_COUNT 209 }; 210 211 static const struct 212 { 213 unsigned int component_size; 214 enum pres_value_type type; 215 } 216 table_info[] = 217 { 218 {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */ 219 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */ 220 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */ 221 {sizeof(BOOL), PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */ 222 {sizeof(int), PRES_VT_INT, }, /* PRES_REGTAB_OICONST */ 223 /* TODO: use double precision for 64 bit */ 224 {sizeof(float), PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */ 225 }; 226 227 static const char *table_symbol[] = 228 { 229 "imm", "c", "oc", "ob", "oi", "r", "(null)", 230 }; 231 232 static const enum pres_reg_tables pres_regset2table[] = 233 { 234 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 235 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 236 PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */ 237 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 238 }; 239 240 static const enum pres_reg_tables shad_regset2table[] = 241 { 242 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 243 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 244 PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */ 245 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 246 }; 247 248 struct d3dx_pres_reg 249 { 250 enum pres_reg_tables table; 251 /* offset is component index, not register index, e. g. 252 offset for component c3.y is 13 (3 * 4 + 1) */ 253 unsigned int offset; 254 }; 255 256 struct d3dx_pres_operand 257 { 258 struct d3dx_pres_reg reg; 259 struct d3dx_pres_reg index_reg; 260 }; 261 262 #define MAX_INPUTS_COUNT 8 263 264 struct d3dx_pres_ins 265 { 266 enum pres_ops op; 267 /* first input argument is scalar, 268 scalar component is propagated */ 269 BOOL scalar_op; 270 unsigned int component_count; 271 struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT]; 272 struct d3dx_pres_operand output; 273 }; 274 275 struct const_upload_info 276 { 277 BOOL transpose; 278 unsigned int major, minor; 279 unsigned int major_stride; 280 unsigned int major_count; 281 unsigned int count; 282 unsigned int minor_remainder; 283 }; 284 285 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type) 286 { 287 switch (type) 288 { 289 case D3DXPT_FLOAT: 290 return PRES_VT_FLOAT; 291 case D3DXPT_INT: 292 return PRES_VT_INT; 293 case D3DXPT_BOOL: 294 return PRES_VT_BOOL; 295 default: 296 FIXME("Unsupported type %u.\n", type); 297 return PRES_VT_COUNT; 298 } 299 } 300 301 static unsigned int get_reg_offset(unsigned int table, unsigned int offset) 302 { 303 return table == PRES_REGTAB_OBCONST ? offset : offset >> 2; 304 } 305 306 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx) 307 { 308 return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2; 309 } 310 311 static unsigned int get_reg_components(unsigned int table) 312 { 313 return get_offset_reg(table, 1); 314 } 315 316 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8) 317 318 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table) 319 { 320 unsigned int size; 321 322 size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size; 323 if (size) 324 { 325 rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size); 326 if (!rs->tables[table]) 327 return E_OUTOFMEMORY; 328 } 329 return D3D_OK; 330 } 331 332 static void regstore_free_tables(struct d3dx_regstore *rs) 333 { 334 unsigned int i; 335 336 for (i = 0; i < PRES_REGTAB_COUNT; ++i) 337 { 338 HeapFree(GetProcessHeap(), 0, rs->tables[i]); 339 } 340 } 341 342 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data, 343 unsigned int start_offset, unsigned int count) 344 { 345 BYTE *dst = rs->tables[table]; 346 const BYTE *src = data; 347 unsigned int size; 348 349 dst += start_offset * table_info[table].component_size; 350 size = count * table_info[table].component_size; 351 assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst)); 352 memcpy(dst, src, size); 353 } 354 355 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset) 356 { 357 BYTE *p; 358 359 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 360 switch (table_info[table].type) 361 { 362 case PRES_VT_FLOAT: 363 return *(float *)p; 364 case PRES_VT_DOUBLE: 365 return *(double *)p; 366 default: 367 FIXME("Unexpected preshader input from table %u.\n", table); 368 return NAN; 369 } 370 } 371 372 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v) 373 { 374 BYTE *p; 375 376 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 377 switch (table_info[table].type) 378 { 379 case PRES_VT_FLOAT : *(float *)p = v; break; 380 case PRES_VT_DOUBLE: *(double *)p = v; break; 381 case PRES_VT_INT : *(int *)p = lrint(v); break; 382 case PRES_VT_BOOL : *(BOOL *)p = !!v; break; 383 default: 384 FIXME("Bad type %u.\n", table_info[table].type); 385 break; 386 } 387 } 388 389 static void dump_bytecode(void *data, unsigned int size) 390 { 391 unsigned int *bytecode = (unsigned int *)data; 392 unsigned int i, j, n; 393 394 size /= sizeof(*bytecode); 395 i = 0; 396 while (i < size) 397 { 398 n = min(size - i, 8); 399 for (j = 0; j < n; ++j) 400 TRACE("0x%08x,", bytecode[i + j]); 401 i += n; 402 TRACE("\n"); 403 } 404 } 405 406 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count, 407 unsigned int fourcc, unsigned int *size) 408 { 409 /* Provide at least one value in comment section on non-NULL return. */ 410 while (count > 2 && (*ptr & 0xffff) == 0xfffe) 411 { 412 unsigned int section_size; 413 414 section_size = (*ptr >> 16); 415 if (!section_size || section_size + 1 > count) 416 break; 417 if (*(ptr + 1) == fourcc) 418 { 419 *size = section_size; 420 return ptr + 2; 421 } 422 count -= section_size + 1; 423 ptr += section_size + 1; 424 } 425 return NULL; 426 } 427 428 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg) 429 { 430 static const enum pres_reg_tables reg_table[8] = 431 { 432 PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT, 433 PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP 434 }; 435 436 if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT) 437 { 438 FIXME("Unsupported register table %#x.\n", *ptr); 439 return NULL; 440 } 441 442 reg->table = reg_table[*ptr++]; 443 reg->offset = *ptr++; 444 return ptr; 445 } 446 447 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr) 448 { 449 if (count < 3 || (*ptr && count < 5)) 450 { 451 WARN("Byte code buffer ends unexpectedly, count %u.\n", count); 452 return NULL; 453 } 454 455 if (*ptr) 456 { 457 if (*ptr != 1) 458 { 459 FIXME("Unknown relative addressing flag, word %#x.\n", *ptr); 460 return NULL; 461 } 462 ptr = parse_pres_reg(ptr + 1, &opr->index_reg); 463 if (!ptr) 464 return NULL; 465 } 466 else 467 { 468 opr->index_reg.table = PRES_REGTAB_COUNT; 469 ++ptr; 470 } 471 472 ptr = parse_pres_reg(ptr, &opr->reg); 473 474 if (opr->reg.table == PRES_REGTAB_OBCONST) 475 opr->reg.offset /= 4; 476 return ptr; 477 } 478 479 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins) 480 { 481 unsigned int ins_code, ins_raw; 482 unsigned int input_count; 483 unsigned int i; 484 485 if (count < 2) 486 { 487 WARN("Byte code buffer ends unexpectedly.\n"); 488 return NULL; 489 } 490 491 ins_raw = *ptr++; 492 ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT; 493 ins->component_count = ins_raw & PRES_NCOMP_MASK; 494 ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG); 495 496 if (ins->component_count < 1 || ins->component_count > 4) 497 { 498 FIXME("Unsupported number of components %u.\n", ins->component_count); 499 return NULL; 500 } 501 input_count = *ptr++; 502 count -= 2; 503 for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i) 504 if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count) 505 break; 506 if (i == ARRAY_SIZE(pres_op_info)) 507 { 508 FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw); 509 return NULL; 510 } 511 ins->op = i; 512 if (input_count > ARRAY_SIZE(ins->inputs)) 513 { 514 FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count, 515 pres_op_info[i].mnem); 516 return NULL; 517 } 518 for (i = 0; i < input_count; ++i) 519 { 520 unsigned int *p; 521 522 p = parse_pres_arg(ptr, count, &ins->inputs[i]); 523 if (!p) 524 return NULL; 525 count -= p - ptr; 526 ptr = p; 527 } 528 ptr = parse_pres_arg(ptr, count, &ins->output); 529 if (ins->output.index_reg.table != PRES_REGTAB_COUNT) 530 { 531 FIXME("Relative addressing in output register not supported.\n"); 532 return NULL; 533 } 534 if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset 535 + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1)) 536 != get_reg_offset(ins->output.reg.table, ins->output.reg.offset)) 537 { 538 FIXME("Instructions outputting multiple registers are not supported.\n"); 539 return NULL; 540 } 541 return ptr; 542 } 543 544 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc, 545 WORD *constantinfo_reserved) 546 { 547 const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc); 548 549 if (!constant) 550 { 551 FIXME("Could not get constant desc.\n"); 552 if (constantinfo_reserved) 553 *constantinfo_reserved = 0; 554 return D3DERR_INVALIDCALL; 555 } 556 *desc = constant->desc; 557 if (constantinfo_reserved) 558 *constantinfo_reserved = constant->constantinfo_reserved; 559 return D3D_OK; 560 } 561 562 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set, 563 struct const_upload_info *info) 564 { 565 struct d3dx_parameter *param = const_set->param; 566 unsigned int table = const_set->table; 567 568 info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS) 569 || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS); 570 if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS) 571 { 572 info->major = param->columns; 573 info->minor = param->rows; 574 } 575 else 576 { 577 info->major = param->rows; 578 info->minor = param->columns; 579 } 580 581 if (get_reg_components(table) == 1) 582 { 583 unsigned int const_length = get_offset_reg(table, const_set->register_count); 584 585 info->major_stride = info->minor; 586 info->major_count = const_length / info->major_stride; 587 info->minor_remainder = const_length % info->major_stride; 588 } 589 else 590 { 591 info->major_stride = get_reg_components(table); 592 info->major_count = const_set->register_count; 593 info->minor_remainder = 0; 594 } 595 info->count = info->major_count * info->minor + info->minor_remainder; 596 } 597 598 #define INITIAL_CONST_SET_SIZE 16 599 600 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set) 601 { 602 if (const_tab->const_set_count >= const_tab->const_set_size) 603 { 604 unsigned int new_size; 605 struct d3dx_const_param_eval_output *new_alloc; 606 607 if (!const_tab->const_set_size) 608 { 609 new_size = INITIAL_CONST_SET_SIZE; 610 new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size); 611 if (!new_alloc) 612 { 613 ERR("Out of memory.\n"); 614 return E_OUTOFMEMORY; 615 } 616 } 617 else 618 { 619 new_size = const_tab->const_set_size * 2; 620 new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set, 621 sizeof(*const_tab->const_set) * new_size); 622 if (!new_alloc) 623 { 624 ERR("Out of memory.\n"); 625 return E_OUTOFMEMORY; 626 } 627 } 628 const_tab->const_set = new_alloc; 629 const_tab->const_set_size = new_size; 630 } 631 const_tab->const_set[const_tab->const_set_count++] = *set; 632 return D3D_OK; 633 } 634 635 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab, 636 struct d3dx_preshader *pres) 637 { 638 unsigned int i; 639 struct d3dx_const_param_eval_output const_set = {NULL}; 640 641 for (i = 0; i < pres->ins_count; ++i) 642 { 643 const struct d3dx_pres_ins *ins = &pres->ins[i]; 644 const struct d3dx_pres_reg *reg = &ins->output.reg; 645 646 if (reg->table == PRES_REGTAB_TEMP) 647 continue; 648 649 const_set.register_index = get_reg_offset(reg->table, reg->offset); 650 const_set.register_count = 1; 651 const_set.table = reg->table; 652 const_set.constant_class = D3DXPC_FORCE_DWORD; 653 const_set.element_count = 1; 654 append_const_set(const_tab, &const_set); 655 } 656 } 657 658 static int compare_const_set(const void *a, const void *b) 659 { 660 const struct d3dx_const_param_eval_output *r1 = a; 661 const struct d3dx_const_param_eval_output *r2 = b; 662 663 if (r1->table != r2->table) 664 return r1->table - r2->table; 665 return r1->register_index - r2->register_index; 666 } 667 668 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab, 669 struct d3dx_parameter *param, unsigned int index) 670 { 671 unsigned int i, start_index = index; 672 DWORD *current_data; 673 enum pres_reg_tables current_table; 674 unsigned int current_start_offset, element_count; 675 struct d3dx_const_param_eval_output *first_const; 676 677 if (!const_tab->const_set_count) 678 return D3D_OK; 679 680 while (index < const_tab->const_set_count - 1) 681 { 682 first_const = &const_tab->const_set[index]; 683 current_data = first_const->param->data; 684 current_table = first_const->table; 685 current_start_offset = get_offset_reg(current_table, first_const->register_index); 686 element_count = 0; 687 for (i = index; i < const_tab->const_set_count; ++i) 688 { 689 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i]; 690 unsigned int count = get_offset_reg(const_set->table, 691 const_set->register_count * const_set->element_count); 692 unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index); 693 694 if (!(const_set->table == current_table && current_start_offset == start_offset 695 && const_set->direct_copy == first_const->direct_copy 696 && current_data == const_set->param->data 697 && (const_set->direct_copy || (first_const->param->type == const_set->param->type 698 && first_const->param->class == const_set->param->class 699 && first_const->param->columns == const_set->param->columns 700 && first_const->param->rows == const_set->param->rows 701 && first_const->register_count == const_set->register_count 702 && (i == const_tab->const_set_count - 1 703 || first_const->param->element_count == const_set->param->element_count))))) 704 break; 705 706 current_start_offset += count; 707 current_data += const_set->direct_copy ? count : const_set->param->rows 708 * const_set->param->columns * const_set->element_count; 709 element_count += const_set->element_count; 710 } 711 712 if (i > index + 1) 713 { 714 TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index, 715 debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy); 716 717 first_const->element_count = element_count; 718 if (first_const->direct_copy) 719 { 720 first_const->element_count = 1; 721 if (index == start_index 722 && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT)) 723 { 724 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 725 return D3DERR_INVALIDCALL; 726 first_const->param = param; 727 } 728 first_const->register_count = get_reg_offset(current_table, current_start_offset) 729 - first_const->register_index; 730 } 731 memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i], 732 sizeof(*const_tab->const_set) * (const_tab->const_set_count - i)); 733 const_tab->const_set_count -= i - index - 1; 734 } 735 else 736 { 737 TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n", 738 const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy); 739 } 740 index = i; 741 } 742 return D3D_OK; 743 } 744 745 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab, 746 D3DXHANDLE hc, struct d3dx_parameter *param) 747 { 748 D3DXCONSTANT_DESC desc; 749 unsigned int const_count, param_count, i; 750 BOOL get_element; 751 struct d3dx_const_param_eval_output const_set; 752 struct const_upload_info info; 753 enum pres_value_type table_type; 754 HRESULT hr; 755 756 if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL))) 757 return D3DERR_INVALIDCALL; 758 759 if (param->element_count) 760 { 761 param_count = param->element_count; 762 const_count = desc.Elements; 763 get_element = TRUE; 764 } 765 else 766 { 767 if (desc.Elements > 1) 768 { 769 FIXME("Unexpected number of constant elements %u.\n", desc.Elements); 770 return D3DERR_INVALIDCALL; 771 } 772 param_count = param->member_count; 773 const_count = desc.StructMembers; 774 get_element = FALSE; 775 } 776 if (const_count != param_count) 777 { 778 FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n", 779 param_count, const_count); 780 return D3DERR_INVALIDCALL; 781 } 782 if (const_count) 783 { 784 HRESULT ret = D3D_OK; 785 D3DXHANDLE hc_element; 786 unsigned int index = const_tab->const_set_count; 787 788 for (i = 0; i < const_count; ++i) 789 { 790 if (get_element) 791 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i); 792 else 793 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i); 794 if (!hc_element) 795 { 796 FIXME("Could not get constant.\n"); 797 hr = D3DERR_INVALIDCALL; 798 } 799 else 800 { 801 hr = init_set_constants_param(const_tab, ctab, hc_element, ¶m->members[i]); 802 } 803 if (FAILED(hr)) 804 ret = hr; 805 } 806 if (FAILED(ret)) 807 return ret; 808 return merge_const_set_entries(const_tab, param, index); 809 } 810 811 TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n", 812 debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes); 813 TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n", 814 debugstr_a(param->name), param->rows, param->columns, param->class, 815 param->flags, param->bytes); 816 817 const_set.element_count = 1; 818 const_set.param = param; 819 const_set.constant_class = desc.Class; 820 if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table)) 821 { 822 FIXME("Unknown register set %u.\n", desc.RegisterSet); 823 return D3DERR_INVALIDCALL; 824 } 825 const_set.register_index = desc.RegisterIndex; 826 const_set.table = const_tab->regset2table[desc.RegisterSet]; 827 if (const_set.table >= PRES_REGTAB_COUNT) 828 { 829 ERR("Unexpected register set %u.\n", desc.RegisterSet); 830 return D3DERR_INVALIDCALL; 831 } 832 assert(table_info[const_set.table].component_size == sizeof(unsigned int)); 833 assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int)); 834 const_set.register_count = desc.RegisterCount; 835 table_type = table_info[const_set.table].type; 836 get_const_upload_info(&const_set, &info); 837 if (!info.count) 838 { 839 TRACE("%s has zero count, skipping.\n", debugstr_a(param->name)); 840 return D3D_OK; 841 } 842 843 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 844 return D3DERR_INVALIDCALL; 845 846 const_set.direct_copy = table_type_from_param_type(param->type) == table_type 847 && !info.transpose && info.minor == info.major_stride 848 && info.count == get_offset_reg(const_set.table, const_set.register_count) 849 && info.count * sizeof(unsigned int) <= param->bytes; 850 if (info.minor_remainder && !const_set.direct_copy && !info.transpose) 851 FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n", 852 debugstr_a(param->name)); 853 854 if (info.major_count > info.major 855 || (info.major_count == info.major && info.minor_remainder)) 856 { 857 WARN("Constant dimensions exceed parameter size.\n"); 858 return D3DERR_INVALIDCALL; 859 } 860 861 if (FAILED(hr = append_const_set(const_tab, &const_set))) 862 return hr; 863 864 return D3D_OK; 865 } 866 867 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out, 868 struct d3dx9_base_effect *base, const char **skip_constants, 869 unsigned int skip_constants_count, struct d3dx_preshader *pres) 870 { 871 ID3DXConstantTable *ctab; 872 D3DXCONSTANT_DESC *cdesc; 873 struct d3dx_parameter **inputs_param; 874 D3DXCONSTANTTABLE_DESC desc; 875 HRESULT hr; 876 D3DXHANDLE hc; 877 unsigned int i, j; 878 879 hr = D3DXGetShaderConstantTable(byte_code, &ctab); 880 if (FAILED(hr) || !ctab) 881 { 882 TRACE("Could not get CTAB data, hr %#x.\n", hr); 883 /* returning OK, shaders and preshaders without CTAB are valid */ 884 return D3D_OK; 885 } 886 if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc))) 887 { 888 FIXME("Could not get CTAB desc, hr %#x.\n", hr); 889 goto cleanup; 890 } 891 892 out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants); 893 out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants); 894 if (!cdesc || !inputs_param) 895 { 896 hr = E_OUTOFMEMORY; 897 goto cleanup; 898 } 899 900 for (i = 0; i < desc.Constants; ++i) 901 { 902 unsigned int index = out->input_count; 903 WORD constantinfo_reserved; 904 905 hc = ID3DXConstantTable_GetConstant(ctab, NULL, i); 906 if (!hc) 907 { 908 FIXME("Null constant handle.\n"); 909 goto cleanup; 910 } 911 if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved))) 912 goto cleanup; 913 inputs_param[index] = get_parameter_by_name(base, NULL, cdesc[index].Name); 914 if (!inputs_param[index]) 915 { 916 WARN("Could not find parameter %s in effect.\n", cdesc[index].Name); 917 continue; 918 } 919 if (cdesc[index].Class == D3DXPC_OBJECT) 920 { 921 TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]); 922 if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT 923 || !is_param_type_sampler(inputs_param[index]->type)) 924 { 925 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name)); 926 hr = D3DERR_INVALIDCALL; 927 goto cleanup; 928 } 929 if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount) 930 { 931 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name)); 932 hr = D3DERR_INVALIDCALL; 933 goto cleanup; 934 } 935 } 936 if (!is_top_level_parameter(inputs_param[index])) 937 { 938 WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name)); 939 hr = E_FAIL; 940 goto cleanup; 941 } 942 943 for (j = 0; j < skip_constants_count; ++j) 944 { 945 if (!strcmp(cdesc[index].Name, skip_constants[j])) 946 { 947 if (!constantinfo_reserved) 948 { 949 WARN("skip_constants parameter %s is not register bound.\n", 950 cdesc[index].Name); 951 hr = D3DERR_INVALIDCALL; 952 goto cleanup; 953 } 954 TRACE("Skipping constant %s.\n", cdesc[index].Name); 955 break; 956 } 957 } 958 if (j < skip_constants_count) 959 continue; 960 ++out->input_count; 961 if (inputs_param[index]->class == D3DXPC_OBJECT) 962 continue; 963 if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index]))) 964 goto cleanup; 965 } 966 if (pres) 967 append_pres_const_sets_for_shader_input(out, pres); 968 if (out->const_set_count) 969 { 970 struct d3dx_const_param_eval_output *new_alloc; 971 972 qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set); 973 974 i = 0; 975 while (i < out->const_set_count - 1) 976 { 977 if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD 978 && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD 979 && out->const_set[i].table == out->const_set[i + 1].table 980 && out->const_set[i].register_index + out->const_set[i].register_count 981 >= out->const_set[i + 1].register_index) 982 { 983 assert(out->const_set[i].register_index + out->const_set[i].register_count 984 <= out->const_set[i + 1].register_index + 1); 985 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1 986 - out->const_set[i].register_index; 987 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i]) 988 * (out->const_set_count - i - 2)); 989 --out->const_set_count; 990 } 991 else 992 { 993 ++i; 994 } 995 } 996 997 new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set, 998 sizeof(*out->const_set) * out->const_set_count); 999 if (new_alloc) 1000 { 1001 out->const_set = new_alloc; 1002 out->const_set_size = out->const_set_count; 1003 } 1004 else 1005 { 1006 WARN("Out of memory.\n"); 1007 } 1008 } 1009 cleanup: 1010 ID3DXConstantTable_Release(ctab); 1011 return hr; 1012 } 1013 1014 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register) 1015 { 1016 if (table < PRES_REGTAB_COUNT) 1017 table_sizes[table] = max(table_sizes[table], max_register + 1); 1018 } 1019 1020 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab) 1021 { 1022 unsigned int i, table, max_register; 1023 1024 for (i = 0; i < ctab->input_count; ++i) 1025 { 1026 if (!ctab->inputs[i].RegisterCount) 1027 continue; 1028 max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1; 1029 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1030 update_table_size(table_sizes, table, max_register); 1031 } 1032 } 1033 1034 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count) 1035 { 1036 static const char *xyzw_str = "xyzw"; 1037 unsigned int i, table; 1038 1039 table = arg->reg.table; 1040 if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT) 1041 { 1042 TRACE("("); 1043 for (i = 0; i < component_count; ++i) 1044 TRACE(i < component_count - 1 ? "%.16e, " : "%.16e", 1045 ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]); 1046 TRACE(")"); 1047 } 1048 else 1049 { 1050 if (arg->index_reg.table == PRES_REGTAB_COUNT) 1051 { 1052 TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset)); 1053 } 1054 else 1055 { 1056 unsigned int index_reg; 1057 1058 index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset); 1059 TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset), 1060 table_symbol[arg->index_reg.table], index_reg, 1061 xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]); 1062 } 1063 for (i = 0; i < component_count; ++i) 1064 TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]); 1065 } 1066 } 1067 1068 static void dump_registers(struct d3dx_const_tab *ctab) 1069 { 1070 unsigned int table, i; 1071 1072 for (i = 0; i < ctab->input_count; ++i) 1073 { 1074 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1075 TRACE("// %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)", 1076 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount); 1077 } 1078 } 1079 1080 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins) 1081 { 1082 unsigned int i; 1083 1084 TRACE("%s ", pres_op_info[ins->op].mnem); 1085 dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count); 1086 for (i = 0; i < pres_op_info[ins->op].input_count; ++i) 1087 { 1088 TRACE(", "); 1089 dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count); 1090 } 1091 TRACE("\n"); 1092 } 1093 1094 static void dump_preshader(struct d3dx_preshader *pres) 1095 { 1096 unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4; 1097 const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED]; 1098 1099 if (immediate_count) 1100 TRACE("// Immediates:\n"); 1101 for (i = 0; i < immediate_count; ++i) 1102 { 1103 if (!(i % 4)) 1104 TRACE("// "); 1105 TRACE("%.8e", immediates[i]); 1106 if (i % 4 == 3) 1107 TRACE("\n"); 1108 else 1109 TRACE(", "); 1110 } 1111 TRACE("// Preshader registers:\n"); 1112 dump_registers(&pres->inputs); 1113 TRACE("preshader\n"); 1114 for (i = 0; i < pres->ins_count; ++i) 1115 dump_ins(&pres->regs, &pres->ins[i]); 1116 } 1117 1118 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base) 1119 { 1120 unsigned int *p; 1121 unsigned int i, j, const_count; 1122 double *dconst; 1123 HRESULT hr; 1124 unsigned int saved_word; 1125 unsigned int section_size; 1126 1127 TRACE("Preshader version %#x.\n", *ptr & 0xffff); 1128 1129 if (!count) 1130 { 1131 WARN("Unexpected end of byte code buffer.\n"); 1132 return D3DXERR_INVALIDDATA; 1133 } 1134 1135 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, §ion_size); 1136 if (p) 1137 { 1138 const_count = *p++; 1139 if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int))) 1140 { 1141 WARN("Byte code buffer ends unexpectedly.\n"); 1142 return D3DXERR_INVALIDDATA; 1143 } 1144 dconst = (double *)p; 1145 } 1146 else 1147 { 1148 const_count = 0; 1149 dconst = NULL; 1150 } 1151 TRACE("%u double constants.\n", const_count); 1152 1153 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, §ion_size); 1154 if (!p) 1155 { 1156 WARN("Could not find preshader code.\n"); 1157 return D3D_OK; 1158 } 1159 pres->ins_count = *p++; 1160 --section_size; 1161 if (pres->ins_count > UINT_MAX / sizeof(*pres->ins)) 1162 { 1163 WARN("Invalid instruction count %u.\n", pres->ins_count); 1164 return D3DXERR_INVALIDDATA; 1165 } 1166 TRACE("%u instructions.\n", pres->ins_count); 1167 pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count); 1168 if (!pres->ins) 1169 return E_OUTOFMEMORY; 1170 for (i = 0; i < pres->ins_count; ++i) 1171 { 1172 unsigned int *ptr_next; 1173 1174 ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]); 1175 if (!ptr_next) 1176 return D3DXERR_INVALIDDATA; 1177 section_size -= ptr_next - p; 1178 p = ptr_next; 1179 } 1180 1181 pres->inputs.regset2table = pres_regset2table; 1182 1183 saved_word = *ptr; 1184 *ptr = 0xfffe0000; 1185 hr = get_constants_desc(ptr, &pres->inputs, base, NULL, 0, NULL); 1186 *ptr = saved_word; 1187 if (FAILED(hr)) 1188 return hr; 1189 1190 if (const_count % get_reg_components(PRES_REGTAB_IMMED)) 1191 { 1192 FIXME("const_count %u is not a multiple of %u.\n", const_count, 1193 get_reg_components(PRES_REGTAB_IMMED)); 1194 return D3DXERR_INVALIDDATA; 1195 } 1196 pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count); 1197 1198 update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs); 1199 for (i = 0; i < pres->ins_count; ++i) 1200 { 1201 for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j) 1202 { 1203 enum pres_reg_tables table; 1204 unsigned int reg_idx; 1205 1206 if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT) 1207 { 1208 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0 1209 : pres->ins[i].component_count - 1; 1210 1211 table = pres->ins[i].inputs[j].reg.table; 1212 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset 1213 + last_component_index); 1214 } 1215 else 1216 { 1217 table = pres->ins[i].inputs[j].index_reg.table; 1218 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset); 1219 } 1220 if (reg_idx >= pres->regs.table_sizes[table]) 1221 { 1222 /* Native accepts these broken preshaders. */ 1223 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u, preshader parsing failed.\n", 1224 i, j, table, reg_idx); 1225 return D3DXERR_INVALIDDATA; 1226 } 1227 } 1228 update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table, 1229 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset)); 1230 } 1231 if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED))) 1232 return E_OUTOFMEMORY; 1233 regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count); 1234 1235 return D3D_OK; 1236 } 1237 1238 HRESULT d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size, 1239 D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter, 1240 const char **skip_constants, unsigned int skip_constants_count) 1241 { 1242 struct d3dx_param_eval *peval; 1243 unsigned int *ptr, *shader_ptr = NULL; 1244 unsigned int i; 1245 BOOL shader; 1246 unsigned int count, pres_size; 1247 HRESULT ret; 1248 1249 TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n", 1250 base_effect, byte_code, byte_code_size, type, peval_out); 1251 1252 count = byte_code_size / sizeof(unsigned int); 1253 if (!byte_code || !count) 1254 { 1255 *peval_out = NULL; 1256 return D3D_OK; 1257 } 1258 1259 peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval)); 1260 if (!peval) 1261 { 1262 ret = E_OUTOFMEMORY; 1263 goto err_out; 1264 } 1265 peval->version_counter = version_counter; 1266 1267 peval->param_type = type; 1268 switch (type) 1269 { 1270 case D3DXPT_VERTEXSHADER: 1271 case D3DXPT_PIXELSHADER: 1272 shader = TRUE; 1273 break; 1274 default: 1275 shader = FALSE; 1276 break; 1277 } 1278 peval->shader_inputs.regset2table = shad_regset2table; 1279 1280 ptr = (unsigned int *)byte_code; 1281 if (shader) 1282 { 1283 if ((*ptr & 0xfffe0000) != 0xfffe0000) 1284 { 1285 FIXME("Invalid shader signature %#x.\n", *ptr); 1286 ret = D3DXERR_INVALIDDATA; 1287 goto err_out; 1288 } 1289 TRACE("Shader version %#x.\n", *ptr & 0xffff); 1290 shader_ptr = ptr; 1291 ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size); 1292 if (!ptr) 1293 TRACE("No preshader found.\n"); 1294 } 1295 else 1296 { 1297 pres_size = count; 1298 } 1299 1300 if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, base_effect))) 1301 { 1302 FIXME("Failed parsing preshader, byte code for analysis follows.\n"); 1303 dump_bytecode(byte_code, byte_code_size); 1304 goto err_out; 1305 } 1306 1307 if (shader) 1308 { 1309 if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, base_effect, 1310 skip_constants, skip_constants_count, &peval->pres))) 1311 { 1312 TRACE("Could not get shader constant table, hr %#x.\n", ret); 1313 goto err_out; 1314 } 1315 update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs); 1316 } 1317 1318 for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i) 1319 { 1320 if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i))) 1321 goto err_out; 1322 } 1323 1324 if (TRACE_ON(d3dx)) 1325 { 1326 dump_bytecode(byte_code, byte_code_size); 1327 dump_preshader(&peval->pres); 1328 if (shader) 1329 { 1330 TRACE("// Shader registers:\n"); 1331 dump_registers(&peval->shader_inputs); 1332 } 1333 } 1334 *peval_out = peval; 1335 TRACE("Created parameter evaluator %p.\n", *peval_out); 1336 return D3D_OK; 1337 1338 err_out: 1339 WARN("Error creating parameter evaluator.\n"); 1340 if (TRACE_ON(d3dx)) 1341 dump_bytecode(byte_code, byte_code_size); 1342 1343 d3dx_free_param_eval(peval); 1344 *peval_out = NULL; 1345 return ret; 1346 } 1347 1348 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab) 1349 { 1350 HeapFree(GetProcessHeap(), 0, ctab->inputs); 1351 HeapFree(GetProcessHeap(), 0, ctab->inputs_param); 1352 HeapFree(GetProcessHeap(), 0, ctab->const_set); 1353 } 1354 1355 static void d3dx_free_preshader(struct d3dx_preshader *pres) 1356 { 1357 HeapFree(GetProcessHeap(), 0, pres->ins); 1358 1359 regstore_free_tables(&pres->regs); 1360 d3dx_free_const_tab(&pres->inputs); 1361 } 1362 1363 void d3dx_free_param_eval(struct d3dx_param_eval *peval) 1364 { 1365 TRACE("peval %p.\n", peval); 1366 1367 if (!peval) 1368 return; 1369 1370 d3dx_free_preshader(&peval->pres); 1371 d3dx_free_const_tab(&peval->shader_inputs); 1372 HeapFree(GetProcessHeap(), 0, peval); 1373 } 1374 1375 static void pres_int_from_float(void *out, const void *in, unsigned int count) 1376 { 1377 unsigned int i; 1378 const float *in_float = in; 1379 int *out_int = out; 1380 1381 for (i = 0; i < count; ++i) 1382 out_int[i] = in_float[i]; 1383 } 1384 1385 static void pres_bool_from_value(void *out, const void *in, unsigned int count) 1386 { 1387 unsigned int i; 1388 const DWORD *in_dword = in; 1389 BOOL *out_bool = out; 1390 1391 for (i = 0; i < count; ++i) 1392 out_bool[i] = !!in_dword[i]; 1393 } 1394 1395 static void pres_float_from_int(void *out, const void *in, unsigned int count) 1396 { 1397 unsigned int i; 1398 const int *in_int = in; 1399 float *out_float = out; 1400 1401 for (i = 0; i < count; ++i) 1402 out_float[i] = in_int[i]; 1403 } 1404 1405 static void pres_float_from_bool(void *out, const void *in, unsigned int count) 1406 { 1407 unsigned int i; 1408 const BOOL *in_bool = in; 1409 float *out_float = out; 1410 1411 for (i = 0; i < count; ++i) 1412 out_float[i] = !!in_bool[i]; 1413 } 1414 1415 static void pres_int_from_bool(void *out, const void *in, unsigned int count) 1416 { 1417 unsigned int i; 1418 const float *in_bool = in; 1419 int *out_int = out; 1420 1421 for (i = 0; i < count; ++i) 1422 out_int[i] = !!in_bool[i]; 1423 } 1424 1425 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table, 1426 unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type) 1427 { 1428 typedef void (*conv_func)(void *out, const void *in, unsigned int count); 1429 static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] = 1430 { 1431 {NULL, NULL, pres_int_from_float, pres_bool_from_value}, 1432 {NULL, NULL, NULL, NULL}, 1433 {pres_float_from_int, NULL, NULL, pres_bool_from_value}, 1434 {pres_float_from_bool, NULL, pres_int_from_bool, NULL} 1435 }; 1436 enum pres_value_type table_type = table_info[table].type; 1437 1438 if (param_type == table_type) 1439 { 1440 regstore_set_values(rs, table, in, offset, count); 1441 return; 1442 } 1443 1444 set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count); 1445 } 1446 1447 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1448 D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr, 1449 unsigned int start, unsigned int count) 1450 { 1451 if (type == D3DXPT_VERTEXSHADER) 1452 { 1453 switch(table) 1454 { 1455 case PRES_REGTAB_OCONST: 1456 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count); 1457 case PRES_REGTAB_OICONST: 1458 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count); 1459 case PRES_REGTAB_OBCONST: 1460 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count); 1461 default: 1462 FIXME("Unexpected register table %u.\n", table); 1463 return D3DERR_INVALIDCALL; 1464 } 1465 } 1466 else if (type == D3DXPT_PIXELSHADER) 1467 { 1468 switch(table) 1469 { 1470 case PRES_REGTAB_OCONST: 1471 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count); 1472 case PRES_REGTAB_OICONST: 1473 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count); 1474 case PRES_REGTAB_OBCONST: 1475 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count); 1476 default: 1477 FIXME("Unexpected register table %u.\n", table); 1478 return D3DERR_INVALIDCALL; 1479 } 1480 } 1481 else 1482 { 1483 FIXME("Unexpected parameter type %u.\n", type); 1484 return D3DERR_INVALIDCALL; 1485 } 1486 } 1487 1488 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab, 1489 ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1490 D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty) 1491 { 1492 unsigned int const_idx; 1493 unsigned int current_start = 0, current_count = 0; 1494 enum pres_reg_tables current_table = PRES_REGTAB_COUNT; 1495 BOOL update_device = manager || device; 1496 HRESULT hr, result = D3D_OK; 1497 ULONG64 update_version = const_tab->update_version; 1498 1499 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1500 { 1501 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1502 enum pres_reg_tables table = const_set->table; 1503 struct d3dx_parameter *param = const_set->param; 1504 unsigned int element, i, j, start_offset; 1505 struct const_upload_info info; 1506 unsigned int *data; 1507 enum pres_value_type param_type; 1508 1509 if (!(param && is_param_dirty(param, update_version))) 1510 continue; 1511 1512 data = param->data; 1513 start_offset = get_offset_reg(table, const_set->register_index); 1514 if (const_set->direct_copy) 1515 { 1516 regstore_set_values(rs, table, data, start_offset, 1517 get_offset_reg(table, const_set->register_count)); 1518 continue; 1519 } 1520 param_type = table_type_from_param_type(param->type); 1521 if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR) 1522 { 1523 unsigned int count = max(param->rows, param->columns); 1524 1525 if (count >= get_reg_components(table)) 1526 { 1527 regstore_set_data(rs, table, start_offset, data, 1528 count * const_set->element_count, param_type); 1529 } 1530 else 1531 { 1532 for (element = 0; element < const_set->element_count; ++element) 1533 regstore_set_data(rs, table, start_offset + get_offset_reg(table, element), 1534 &data[element * count], count, param_type); 1535 } 1536 continue; 1537 } 1538 get_const_upload_info(const_set, &info); 1539 for (element = 0; element < const_set->element_count; ++element) 1540 { 1541 unsigned int *out = (unsigned int *)rs->tables[table] + start_offset; 1542 1543 /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer. 1544 * All the supported types of parameters and table values have the same size. */ 1545 if (info.transpose) 1546 { 1547 for (i = 0; i < info.major_count; ++i) 1548 for (j = 0; j < info.minor; ++j) 1549 out[i * info.major_stride + j] = data[i + j * info.major]; 1550 1551 for (j = 0; j < info.minor_remainder; ++j) 1552 out[i * info.major_stride + j] = data[i + j * info.major]; 1553 } 1554 else 1555 { 1556 for (i = 0; i < info.major_count; ++i) 1557 for (j = 0; j < info.minor; ++j) 1558 out[i * info.major_stride + j] = data[i * info.minor + j]; 1559 } 1560 start_offset += get_offset_reg(table, const_set->register_count); 1561 data += param->rows * param->columns; 1562 } 1563 start_offset = get_offset_reg(table, const_set->register_index); 1564 if (table_info[table].type != param_type) 1565 regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset, 1566 get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type); 1567 } 1568 const_tab->update_version = new_update_version; 1569 if (!update_device) 1570 return D3D_OK; 1571 1572 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1573 { 1574 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1575 1576 if (device_update_all || (const_set->param 1577 ? is_param_dirty(const_set->param, update_version) : pres_dirty)) 1578 { 1579 enum pres_reg_tables table = const_set->table; 1580 1581 if (table == current_table && current_start + current_count == const_set->register_index) 1582 { 1583 current_count += const_set->register_count * const_set->element_count; 1584 } 1585 else 1586 { 1587 if (current_count) 1588 { 1589 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1590 (DWORD *)rs->tables[current_table] 1591 + get_offset_reg(current_table, current_start), current_start, current_count))) 1592 result = hr; 1593 } 1594 current_table = table; 1595 current_start = const_set->register_index; 1596 current_count = const_set->register_count * const_set->element_count; 1597 } 1598 } 1599 } 1600 if (current_count) 1601 { 1602 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1603 (DWORD *)rs->tables[current_table] 1604 + get_offset_reg(current_table, current_start), current_start, current_count))) 1605 result = hr; 1606 } 1607 return result; 1608 } 1609 1610 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset) 1611 { 1612 return regstore_get_double(rs, table, offset); 1613 } 1614 1615 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp) 1616 { 1617 unsigned int offset, base_index, reg_index, table; 1618 1619 table = opr->reg.table; 1620 1621 if (opr->index_reg.table == PRES_REGTAB_COUNT) 1622 base_index = 0; 1623 else 1624 base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset)); 1625 1626 offset = get_offset_reg(table, base_index) + opr->reg.offset + comp; 1627 reg_index = get_reg_offset(table, offset); 1628 1629 if (reg_index >= rs->table_sizes[table]) 1630 { 1631 unsigned int wrap_size; 1632 1633 if (table == PRES_REGTAB_CONST) 1634 { 1635 /* As it can be guessed from tests, offset into floating constant table is wrapped 1636 * to the nearest power of 2 and not to the actual table size. */ 1637 for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1) 1638 ; 1639 } 1640 else 1641 { 1642 wrap_size = rs->table_sizes[table]; 1643 } 1644 WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n", 1645 reg_index, table, wrap_size, rs->table_sizes[table]); 1646 reg_index %= wrap_size; 1647 1648 if (reg_index >= rs->table_sizes[table]) 1649 return 0.0; 1650 1651 offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table); 1652 } 1653 1654 return exec_get_reg_value(rs, table, offset); 1655 } 1656 1657 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg, 1658 unsigned int comp, double res) 1659 { 1660 regstore_set_double(rs, reg->table, reg->offset + comp, res); 1661 } 1662 1663 #define ARGS_ARRAY_SIZE 8 1664 static HRESULT execute_preshader(struct d3dx_preshader *pres) 1665 { 1666 unsigned int i, j, k; 1667 double args[ARGS_ARRAY_SIZE]; 1668 double res; 1669 1670 for (i = 0; i < pres->ins_count; ++i) 1671 { 1672 const struct d3dx_pres_ins *ins; 1673 const struct op_info *oi; 1674 1675 ins = &pres->ins[i]; 1676 oi = &pres_op_info[ins->op]; 1677 if (oi->func_all_comps) 1678 { 1679 if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE) 1680 { 1681 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count); 1682 return E_FAIL; 1683 } 1684 for (k = 0; k < oi->input_count; ++k) 1685 for (j = 0; j < ins->component_count; ++j) 1686 args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k], 1687 ins->scalar_op && !k ? 0 : j); 1688 res = oi->func(args, ins->component_count); 1689 1690 /* only 'dot' instruction currently falls here */ 1691 exec_set_arg(&pres->regs, &ins->output.reg, 0, res); 1692 } 1693 else 1694 { 1695 for (j = 0; j < ins->component_count; ++j) 1696 { 1697 for (k = 0; k < oi->input_count; ++k) 1698 args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j); 1699 res = oi->func(args, ins->component_count); 1700 exec_set_arg(&pres->regs, &ins->output.reg, j, res); 1701 } 1702 } 1703 } 1704 return D3D_OK; 1705 } 1706 1707 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version) 1708 { 1709 unsigned int i; 1710 1711 if (update_version == ULONG64_MAX) 1712 update_version = ctab->update_version; 1713 for (i = 0; i < ctab->input_count; ++i) 1714 { 1715 if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]), 1716 update_version)) 1717 return TRUE; 1718 } 1719 return FALSE; 1720 } 1721 1722 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version) 1723 { 1724 return is_const_tab_input_dirty(&peval->pres.inputs, update_version) 1725 || is_const_tab_input_dirty(&peval->shader_inputs, update_version); 1726 } 1727 1728 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param, 1729 void *param_value) 1730 { 1731 HRESULT hr; 1732 unsigned int i; 1733 unsigned int elements, elements_param, elements_table; 1734 float *oc; 1735 1736 TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value); 1737 1738 if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX)) 1739 { 1740 set_constants(&peval->pres.regs, &peval->pres.inputs, 1741 next_update_version(peval->version_counter), 1742 NULL, NULL, peval->param_type, FALSE, FALSE); 1743 1744 if (FAILED(hr = execute_preshader(&peval->pres))) 1745 return hr; 1746 } 1747 1748 elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]); 1749 elements_param = param->bytes / sizeof(unsigned int); 1750 elements = min(elements_table, elements_param); 1751 oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST]; 1752 for (i = 0; i < elements; ++i) 1753 set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT); 1754 return D3D_OK; 1755 } 1756 1757 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1758 struct d3dx_param_eval *peval, BOOL update_all) 1759 { 1760 HRESULT hr; 1761 struct d3dx_preshader *pres = &peval->pres; 1762 struct d3dx_regstore *rs = &pres->regs; 1763 ULONG64 new_update_version = next_update_version(peval->version_counter); 1764 BOOL pres_dirty = FALSE; 1765 1766 TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type); 1767 1768 if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX)) 1769 { 1770 set_constants(rs, &pres->inputs, new_update_version, 1771 NULL, NULL, peval->param_type, FALSE, FALSE); 1772 if (FAILED(hr = execute_preshader(pres))) 1773 return hr; 1774 pres_dirty = TRUE; 1775 } 1776 1777 return set_constants(rs, &peval->shader_inputs, new_update_version, 1778 manager, device, peval->param_type, update_all, pres_dirty); 1779 } 1780