1 #ifdef __REACTOS__ 2 #include "precomp.h" 3 #else 4 /* 5 * Copyright 2016 Paul Gofman 6 * 7 * This library is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * This library is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with this library; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 20 */ 21 22 23 #include "d3dx9_private.h" 24 25 #include <float.h> 26 #include <math.h> 27 #include <assert.h> 28 #endif /* __REACTOS__ */ 29 30 WINE_DEFAULT_DEBUG_CHANNEL(d3dx); 31 32 #ifdef __REACTOS__ 33 /* ReactOS FIXME: Insect */ 34 #define fmin min 35 #define fmax max 36 #endif 37 38 enum pres_ops 39 { 40 PRESHADER_OP_NOP, 41 PRESHADER_OP_MOV, 42 PRESHADER_OP_NEG, 43 PRESHADER_OP_RCP, 44 PRESHADER_OP_FRC, 45 PRESHADER_OP_EXP, 46 PRESHADER_OP_LOG, 47 PRESHADER_OP_RSQ, 48 PRESHADER_OP_SIN, 49 PRESHADER_OP_COS, 50 PRESHADER_OP_ASIN, 51 PRESHADER_OP_ACOS, 52 PRESHADER_OP_ATAN, 53 PRESHADER_OP_MIN, 54 PRESHADER_OP_MAX, 55 PRESHADER_OP_LT, 56 PRESHADER_OP_GE, 57 PRESHADER_OP_ADD, 58 PRESHADER_OP_MUL, 59 PRESHADER_OP_ATAN2, 60 PRESHADER_OP_DIV, 61 PRESHADER_OP_CMP, 62 PRESHADER_OP_DOT, 63 PRESHADER_OP_DOTSWIZ6, 64 PRESHADER_OP_DOTSWIZ8, 65 }; 66 67 typedef double (*pres_op_func)(double *args, int n); 68 69 static double to_signed_nan(double v) 70 { 71 static const union 72 { 73 ULONG64 ulong64_value; 74 double double_value; 75 } 76 signed_nan = 77 { 78 0xfff8000000000000 79 }; 80 81 return isnan(v) ? signed_nan.double_value : v; 82 } 83 84 static double pres_mov(double *args, int n) {return args[0];} 85 static double pres_add(double *args, int n) {return args[0] + args[1];} 86 static double pres_mul(double *args, int n) {return args[0] * args[1];} 87 static double pres_dot(double *args, int n) 88 { 89 int i; 90 double sum; 91 92 sum = 0.0; 93 for (i = 0; i < n; ++i) 94 sum += args[i] * args[i + n]; 95 return sum; 96 } 97 98 static double pres_dotswiz6(double *args, int n) 99 { 100 return pres_dot(args, 3); 101 } 102 103 static double pres_dotswiz8(double *args, int n) 104 { 105 return pres_dot(args, 4); 106 } 107 108 static double pres_neg(double *args, int n) {return -args[0];} 109 static double pres_rcp(double *args, int n) {return 1.0 / args[0];} 110 static double pres_lt(double *args, int n) {return args[0] < args[1] ? 1.0 : 0.0;} 111 static double pres_ge(double *args, int n) {return args[0] >= args[1] ? 1.0 : 0.0;} 112 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);} 113 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);} 114 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);} 115 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];} 116 static double pres_sin(double *args, int n) {return sin(args[0]);} 117 static double pres_cos(double *args, int n) {return cos(args[0]);} 118 static double pres_rsq(double *args, int n) 119 { 120 double v; 121 122 v = fabs(args[0]); 123 if (v == 0.0) 124 return INFINITY; 125 else 126 return 1.0 / sqrt(v); 127 } 128 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);} 129 static double pres_log(double *args, int n) 130 { 131 double v; 132 133 v = fabs(args[0]); 134 if (v == 0.0) 135 return 0.0; 136 else 137 return log2(v); 138 } 139 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));} 140 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));} 141 static double pres_atan(double *args, int n) {return atan(args[0]);} 142 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);} 143 144 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever 145 * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */ 146 static double pres_div(double *args, int n) {return 0.0;} 147 148 #define PRES_OPCODE_MASK 0x7ff00000 149 #define PRES_OPCODE_SHIFT 20 150 #define PRES_SCALAR_FLAG 0x80000000 151 #define PRES_NCOMP_MASK 0x0000ffff 152 153 #define FOURCC_PRES 0x53455250 154 #define FOURCC_CLIT 0x54494c43 155 #define FOURCC_FXLC 0x434c5846 156 #define FOURCC_PRSI 0x49535250 157 #define PRES_SIGN 0x46580000 158 159 struct op_info 160 { 161 unsigned int opcode; 162 char mnem[16]; 163 unsigned int input_count; 164 BOOL func_all_comps; 165 pres_op_func func; 166 }; 167 168 static const struct op_info pres_op_info[] = 169 { 170 {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */ 171 {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */ 172 {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */ 173 {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */ 174 {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */ 175 {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */ 176 {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */ 177 {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */ 178 {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */ 179 {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */ 180 {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */ 181 {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */ 182 {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */ 183 {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */ 184 {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */ 185 {0x202, "lt", 2, 0, pres_lt }, /* PRESHADER_OP_LT */ 186 {0x203, "ge", 2, 0, pres_ge }, /* PRESHADER_OP_GE */ 187 {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */ 188 {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */ 189 {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */ 190 {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */ 191 {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */ 192 {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */ 193 {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */ 194 {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */ 195 }; 196 197 enum pres_value_type 198 { 199 PRES_VT_FLOAT, 200 PRES_VT_DOUBLE, 201 PRES_VT_INT, 202 PRES_VT_BOOL, 203 PRES_VT_COUNT 204 }; 205 206 static const struct 207 { 208 unsigned int component_size; 209 enum pres_value_type type; 210 } 211 table_info[] = 212 { 213 {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */ 214 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */ 215 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */ 216 {sizeof(BOOL), PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */ 217 {sizeof(int), PRES_VT_INT, }, /* PRES_REGTAB_OICONST */ 218 /* TODO: use double precision for 64 bit */ 219 {sizeof(float), PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */ 220 }; 221 222 static const char *table_symbol[] = 223 { 224 "imm", "c", "oc", "ob", "oi", "r", "(null)", 225 }; 226 227 static const enum pres_reg_tables pres_regset2table[] = 228 { 229 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 230 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 231 PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */ 232 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 233 }; 234 235 static const enum pres_reg_tables shad_regset2table[] = 236 { 237 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 238 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 239 PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */ 240 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 241 }; 242 243 struct d3dx_pres_reg 244 { 245 enum pres_reg_tables table; 246 /* offset is component index, not register index, e. g. 247 offset for component c3.y is 13 (3 * 4 + 1) */ 248 unsigned int offset; 249 }; 250 251 struct d3dx_pres_operand 252 { 253 struct d3dx_pres_reg reg; 254 struct d3dx_pres_reg index_reg; 255 }; 256 257 #define MAX_INPUTS_COUNT 8 258 259 struct d3dx_pres_ins 260 { 261 enum pres_ops op; 262 /* first input argument is scalar, 263 scalar component is propagated */ 264 BOOL scalar_op; 265 unsigned int component_count; 266 struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT]; 267 struct d3dx_pres_operand output; 268 }; 269 270 struct const_upload_info 271 { 272 BOOL transpose; 273 unsigned int major, minor; 274 unsigned int major_stride; 275 unsigned int major_count; 276 unsigned int count; 277 unsigned int minor_remainder; 278 }; 279 280 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type) 281 { 282 switch (type) 283 { 284 case D3DXPT_FLOAT: 285 return PRES_VT_FLOAT; 286 case D3DXPT_INT: 287 return PRES_VT_INT; 288 case D3DXPT_BOOL: 289 return PRES_VT_BOOL; 290 default: 291 FIXME("Unsupported type %u.\n", type); 292 return PRES_VT_COUNT; 293 } 294 } 295 296 static unsigned int get_reg_offset(unsigned int table, unsigned int offset) 297 { 298 return table == PRES_REGTAB_OBCONST ? offset : offset >> 2; 299 } 300 301 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx) 302 { 303 return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2; 304 } 305 306 static unsigned int get_reg_components(unsigned int table) 307 { 308 return get_offset_reg(table, 1); 309 } 310 311 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8) 312 313 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table) 314 { 315 unsigned int size; 316 317 size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size; 318 if (size) 319 { 320 rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size); 321 if (!rs->tables[table]) 322 return E_OUTOFMEMORY; 323 } 324 return D3D_OK; 325 } 326 327 static void regstore_free_tables(struct d3dx_regstore *rs) 328 { 329 unsigned int i; 330 331 for (i = 0; i < PRES_REGTAB_COUNT; ++i) 332 { 333 HeapFree(GetProcessHeap(), 0, rs->tables[i]); 334 } 335 } 336 337 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data, 338 unsigned int start_offset, unsigned int count) 339 { 340 BYTE *dst = rs->tables[table]; 341 const BYTE *src = data; 342 unsigned int size; 343 344 dst += start_offset * table_info[table].component_size; 345 size = count * table_info[table].component_size; 346 assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst)); 347 memcpy(dst, src, size); 348 } 349 350 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset) 351 { 352 BYTE *p; 353 354 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 355 switch (table_info[table].type) 356 { 357 case PRES_VT_FLOAT: 358 return *(float *)p; 359 case PRES_VT_DOUBLE: 360 return *(double *)p; 361 default: 362 FIXME("Unexpected preshader input from table %u.\n", table); 363 return NAN; 364 } 365 } 366 367 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v) 368 { 369 BYTE *p; 370 371 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 372 switch (table_info[table].type) 373 { 374 case PRES_VT_FLOAT : *(float *)p = v; break; 375 case PRES_VT_DOUBLE: *(double *)p = v; break; 376 case PRES_VT_INT : *(int *)p = lrint(v); break; 377 case PRES_VT_BOOL : *(BOOL *)p = !!v; break; 378 default: 379 FIXME("Bad type %u.\n", table_info[table].type); 380 break; 381 } 382 } 383 384 static void dump_bytecode(void *data, unsigned int size) 385 { 386 unsigned int *bytecode = (unsigned int *)data; 387 unsigned int i, j, n; 388 389 size /= sizeof(*bytecode); 390 i = 0; 391 while (i < size) 392 { 393 n = min(size - i, 8); 394 for (j = 0; j < n; ++j) 395 TRACE("0x%08x,", bytecode[i + j]); 396 i += n; 397 TRACE("\n"); 398 } 399 } 400 401 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count, 402 unsigned int fourcc, unsigned int *size) 403 { 404 /* Provide at least one value in comment section on non-NULL return. */ 405 while (count > 2 && (*ptr & 0xffff) == 0xfffe) 406 { 407 unsigned int section_size; 408 409 section_size = (*ptr >> 16); 410 if (!section_size || section_size + 1 > count) 411 break; 412 if (*(ptr + 1) == fourcc) 413 { 414 *size = section_size; 415 return ptr + 2; 416 } 417 count -= section_size + 1; 418 ptr += section_size + 1; 419 } 420 return NULL; 421 } 422 423 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg) 424 { 425 static const enum pres_reg_tables reg_table[8] = 426 { 427 PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT, 428 PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP 429 }; 430 431 if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT) 432 { 433 FIXME("Unsupported register table %#x.\n", *ptr); 434 return NULL; 435 } 436 437 reg->table = reg_table[*ptr++]; 438 reg->offset = *ptr++; 439 return ptr; 440 } 441 442 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr) 443 { 444 if (count < 3 || (*ptr && count < 5)) 445 { 446 WARN("Byte code buffer ends unexpectedly, count %u.\n", count); 447 return NULL; 448 } 449 450 if (*ptr) 451 { 452 if (*ptr != 1) 453 { 454 FIXME("Unknown relative addressing flag, word %#x.\n", *ptr); 455 return NULL; 456 } 457 ptr = parse_pres_reg(ptr + 1, &opr->index_reg); 458 if (!ptr) 459 return NULL; 460 } 461 else 462 { 463 opr->index_reg.table = PRES_REGTAB_COUNT; 464 ++ptr; 465 } 466 467 ptr = parse_pres_reg(ptr, &opr->reg); 468 469 if (opr->reg.table == PRES_REGTAB_OBCONST) 470 opr->reg.offset /= 4; 471 return ptr; 472 } 473 474 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins) 475 { 476 unsigned int ins_code, ins_raw; 477 unsigned int input_count; 478 unsigned int i; 479 480 if (count < 2) 481 { 482 WARN("Byte code buffer ends unexpectedly.\n"); 483 return NULL; 484 } 485 486 ins_raw = *ptr++; 487 ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT; 488 ins->component_count = ins_raw & PRES_NCOMP_MASK; 489 ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG); 490 491 if (ins->component_count < 1 || ins->component_count > 4) 492 { 493 FIXME("Unsupported number of components %u.\n", ins->component_count); 494 return NULL; 495 } 496 input_count = *ptr++; 497 count -= 2; 498 for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i) 499 if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count) 500 break; 501 if (i == ARRAY_SIZE(pres_op_info)) 502 { 503 FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw); 504 return NULL; 505 } 506 ins->op = i; 507 if (input_count > ARRAY_SIZE(ins->inputs)) 508 { 509 FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count, 510 pres_op_info[i].mnem); 511 return NULL; 512 } 513 for (i = 0; i < input_count; ++i) 514 { 515 unsigned int *p; 516 517 p = parse_pres_arg(ptr, count, &ins->inputs[i]); 518 if (!p) 519 return NULL; 520 count -= p - ptr; 521 ptr = p; 522 } 523 ptr = parse_pres_arg(ptr, count, &ins->output); 524 if (ins->output.index_reg.table != PRES_REGTAB_COUNT) 525 { 526 FIXME("Relative addressing in output register not supported.\n"); 527 return NULL; 528 } 529 if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset 530 + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1)) 531 != get_reg_offset(ins->output.reg.table, ins->output.reg.offset)) 532 { 533 FIXME("Instructions outputting multiple registers are not supported.\n"); 534 return NULL; 535 } 536 return ptr; 537 } 538 539 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc, 540 WORD *constantinfo_reserved) 541 { 542 const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc); 543 544 if (!constant) 545 { 546 FIXME("Could not get constant desc.\n"); 547 if (constantinfo_reserved) 548 *constantinfo_reserved = 0; 549 return D3DERR_INVALIDCALL; 550 } 551 *desc = constant->desc; 552 if (constantinfo_reserved) 553 *constantinfo_reserved = constant->constantinfo_reserved; 554 return D3D_OK; 555 } 556 557 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set, 558 struct const_upload_info *info) 559 { 560 struct d3dx_parameter *param = const_set->param; 561 unsigned int table = const_set->table; 562 563 info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS) 564 || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS); 565 if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS) 566 { 567 info->major = param->columns; 568 info->minor = param->rows; 569 } 570 else 571 { 572 info->major = param->rows; 573 info->minor = param->columns; 574 } 575 576 if (get_reg_components(table) == 1) 577 { 578 unsigned int const_length = get_offset_reg(table, const_set->register_count); 579 580 info->major_stride = info->minor; 581 info->major_count = const_length / info->major_stride; 582 info->minor_remainder = const_length % info->major_stride; 583 } 584 else 585 { 586 info->major_stride = get_reg_components(table); 587 info->major_count = const_set->register_count; 588 info->minor_remainder = 0; 589 } 590 info->count = info->major_count * info->minor + info->minor_remainder; 591 } 592 593 #define INITIAL_CONST_SET_SIZE 16 594 595 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set) 596 { 597 if (const_tab->const_set_count >= const_tab->const_set_size) 598 { 599 unsigned int new_size; 600 struct d3dx_const_param_eval_output *new_alloc; 601 602 if (!const_tab->const_set_size) 603 { 604 new_size = INITIAL_CONST_SET_SIZE; 605 new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size); 606 if (!new_alloc) 607 { 608 ERR("Out of memory.\n"); 609 return E_OUTOFMEMORY; 610 } 611 } 612 else 613 { 614 new_size = const_tab->const_set_size * 2; 615 new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set, 616 sizeof(*const_tab->const_set) * new_size); 617 if (!new_alloc) 618 { 619 ERR("Out of memory.\n"); 620 return E_OUTOFMEMORY; 621 } 622 } 623 const_tab->const_set = new_alloc; 624 const_tab->const_set_size = new_size; 625 } 626 const_tab->const_set[const_tab->const_set_count++] = *set; 627 return D3D_OK; 628 } 629 630 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab, 631 struct d3dx_preshader *pres) 632 { 633 unsigned int i; 634 struct d3dx_const_param_eval_output const_set = {NULL}; 635 636 for (i = 0; i < pres->ins_count; ++i) 637 { 638 const struct d3dx_pres_ins *ins = &pres->ins[i]; 639 const struct d3dx_pres_reg *reg = &ins->output.reg; 640 641 if (reg->table == PRES_REGTAB_TEMP) 642 continue; 643 644 const_set.register_index = get_reg_offset(reg->table, reg->offset); 645 const_set.register_count = 1; 646 const_set.table = reg->table; 647 const_set.constant_class = D3DXPC_FORCE_DWORD; 648 const_set.element_count = 1; 649 append_const_set(const_tab, &const_set); 650 } 651 } 652 653 static int __cdecl compare_const_set(const void *a, const void *b) 654 { 655 const struct d3dx_const_param_eval_output *r1 = a; 656 const struct d3dx_const_param_eval_output *r2 = b; 657 658 if (r1->table != r2->table) 659 return r1->table - r2->table; 660 return r1->register_index - r2->register_index; 661 } 662 663 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab, 664 struct d3dx_parameter *param, unsigned int index) 665 { 666 unsigned int i, start_index = index; 667 DWORD *current_data; 668 enum pres_reg_tables current_table; 669 unsigned int current_start_offset, element_count; 670 struct d3dx_const_param_eval_output *first_const; 671 672 if (!const_tab->const_set_count) 673 return D3D_OK; 674 675 while (index < const_tab->const_set_count - 1) 676 { 677 first_const = &const_tab->const_set[index]; 678 current_data = first_const->param->data; 679 current_table = first_const->table; 680 current_start_offset = get_offset_reg(current_table, first_const->register_index); 681 element_count = 0; 682 for (i = index; i < const_tab->const_set_count; ++i) 683 { 684 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i]; 685 unsigned int count = get_offset_reg(const_set->table, 686 const_set->register_count * const_set->element_count); 687 unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index); 688 689 if (!(const_set->table == current_table && current_start_offset == start_offset 690 && const_set->direct_copy == first_const->direct_copy 691 && current_data == const_set->param->data 692 && (const_set->direct_copy || (first_const->param->type == const_set->param->type 693 && first_const->param->class == const_set->param->class 694 && first_const->param->columns == const_set->param->columns 695 && first_const->param->rows == const_set->param->rows 696 && first_const->register_count == const_set->register_count 697 && (i == const_tab->const_set_count - 1 698 || first_const->param->element_count == const_set->param->element_count))))) 699 break; 700 701 current_start_offset += count; 702 current_data += const_set->direct_copy ? count : const_set->param->rows 703 * const_set->param->columns * const_set->element_count; 704 element_count += const_set->element_count; 705 } 706 707 if (i > index + 1) 708 { 709 TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index, 710 debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy); 711 712 first_const->element_count = element_count; 713 if (first_const->direct_copy) 714 { 715 first_const->element_count = 1; 716 if (index == start_index 717 && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT)) 718 { 719 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 720 return D3DERR_INVALIDCALL; 721 first_const->param = param; 722 } 723 first_const->register_count = get_reg_offset(current_table, current_start_offset) 724 - first_const->register_index; 725 } 726 memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i], 727 sizeof(*const_tab->const_set) * (const_tab->const_set_count - i)); 728 const_tab->const_set_count -= i - index - 1; 729 } 730 else 731 { 732 TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n", 733 const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy); 734 } 735 index = i; 736 } 737 return D3D_OK; 738 } 739 740 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab, 741 D3DXHANDLE hc, struct d3dx_parameter *param) 742 { 743 D3DXCONSTANT_DESC desc; 744 unsigned int const_count, param_count, i; 745 BOOL get_element; 746 struct d3dx_const_param_eval_output const_set; 747 struct const_upload_info info; 748 enum pres_value_type table_type; 749 HRESULT hr; 750 751 if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL))) 752 return D3DERR_INVALIDCALL; 753 754 if (param->element_count) 755 { 756 param_count = param->element_count; 757 const_count = desc.Elements; 758 get_element = TRUE; 759 } 760 else 761 { 762 if (desc.Elements > 1) 763 { 764 FIXME("Unexpected number of constant elements %u.\n", desc.Elements); 765 return D3DERR_INVALIDCALL; 766 } 767 param_count = param->member_count; 768 const_count = desc.StructMembers; 769 get_element = FALSE; 770 } 771 if (const_count != param_count) 772 { 773 FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n", 774 param_count, const_count); 775 return D3DERR_INVALIDCALL; 776 } 777 if (const_count) 778 { 779 HRESULT ret = D3D_OK; 780 D3DXHANDLE hc_element; 781 unsigned int index = const_tab->const_set_count; 782 783 for (i = 0; i < const_count; ++i) 784 { 785 if (get_element) 786 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i); 787 else 788 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i); 789 if (!hc_element) 790 { 791 FIXME("Could not get constant.\n"); 792 hr = D3DERR_INVALIDCALL; 793 } 794 else 795 { 796 hr = init_set_constants_param(const_tab, ctab, hc_element, ¶m->members[i]); 797 } 798 if (FAILED(hr)) 799 ret = hr; 800 } 801 if (FAILED(ret)) 802 return ret; 803 return merge_const_set_entries(const_tab, param, index); 804 } 805 806 TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n", 807 debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes); 808 TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n", 809 debugstr_a(param->name), param->rows, param->columns, param->class, 810 param->flags, param->bytes); 811 812 const_set.element_count = 1; 813 const_set.param = param; 814 const_set.constant_class = desc.Class; 815 if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table)) 816 { 817 FIXME("Unknown register set %u.\n", desc.RegisterSet); 818 return D3DERR_INVALIDCALL; 819 } 820 const_set.register_index = desc.RegisterIndex; 821 const_set.table = const_tab->regset2table[desc.RegisterSet]; 822 if (const_set.table >= PRES_REGTAB_COUNT) 823 { 824 ERR("Unexpected register set %u.\n", desc.RegisterSet); 825 return D3DERR_INVALIDCALL; 826 } 827 assert(table_info[const_set.table].component_size == sizeof(unsigned int)); 828 assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int)); 829 const_set.register_count = desc.RegisterCount; 830 table_type = table_info[const_set.table].type; 831 get_const_upload_info(&const_set, &info); 832 if (!info.count) 833 { 834 TRACE("%s has zero count, skipping.\n", debugstr_a(param->name)); 835 return D3D_OK; 836 } 837 838 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 839 return D3DERR_INVALIDCALL; 840 841 const_set.direct_copy = table_type_from_param_type(param->type) == table_type 842 && !info.transpose && info.minor == info.major_stride 843 && info.count == get_offset_reg(const_set.table, const_set.register_count) 844 && info.count * sizeof(unsigned int) <= param->bytes; 845 if (info.minor_remainder && !const_set.direct_copy && !info.transpose) 846 FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n", 847 debugstr_a(param->name)); 848 849 if (info.major_count > info.major 850 || (info.major_count == info.major && info.minor_remainder)) 851 { 852 WARN("Constant dimensions exceed parameter size.\n"); 853 return D3DERR_INVALIDCALL; 854 } 855 856 if (FAILED(hr = append_const_set(const_tab, &const_set))) 857 return hr; 858 859 return D3D_OK; 860 } 861 862 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out, 863 struct d3dx_effect *effect, const char **skip_constants, 864 unsigned int skip_constants_count, struct d3dx_preshader *pres) 865 { 866 ID3DXConstantTable *ctab; 867 D3DXCONSTANT_DESC *cdesc; 868 struct d3dx_parameter **inputs_param; 869 D3DXCONSTANTTABLE_DESC desc; 870 HRESULT hr; 871 D3DXHANDLE hc; 872 unsigned int i, j; 873 874 hr = D3DXGetShaderConstantTable(byte_code, &ctab); 875 if (FAILED(hr) || !ctab) 876 { 877 TRACE("Could not get CTAB data, hr %#x.\n", hr); 878 /* returning OK, shaders and preshaders without CTAB are valid */ 879 return D3D_OK; 880 } 881 if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc))) 882 { 883 FIXME("Could not get CTAB desc, hr %#x.\n", hr); 884 goto cleanup; 885 } 886 887 out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants); 888 out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants); 889 if (!cdesc || !inputs_param) 890 { 891 hr = E_OUTOFMEMORY; 892 goto cleanup; 893 } 894 895 for (i = 0; i < desc.Constants; ++i) 896 { 897 unsigned int index = out->input_count; 898 WORD constantinfo_reserved; 899 900 hc = ID3DXConstantTable_GetConstant(ctab, NULL, i); 901 if (!hc) 902 { 903 FIXME("Null constant handle.\n"); 904 goto cleanup; 905 } 906 if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved))) 907 goto cleanup; 908 inputs_param[index] = get_parameter_by_name(effect, NULL, cdesc[index].Name); 909 if (!inputs_param[index]) 910 { 911 WARN("Could not find parameter %s in effect.\n", cdesc[index].Name); 912 continue; 913 } 914 if (cdesc[index].Class == D3DXPC_OBJECT) 915 { 916 TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]); 917 if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT 918 || !is_param_type_sampler(inputs_param[index]->type)) 919 { 920 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name)); 921 hr = D3DERR_INVALIDCALL; 922 goto cleanup; 923 } 924 if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount) 925 { 926 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name)); 927 hr = D3DERR_INVALIDCALL; 928 goto cleanup; 929 } 930 } 931 if (!is_top_level_parameter(inputs_param[index])) 932 { 933 WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name)); 934 hr = E_FAIL; 935 goto cleanup; 936 } 937 938 for (j = 0; j < skip_constants_count; ++j) 939 { 940 if (!strcmp(cdesc[index].Name, skip_constants[j])) 941 { 942 if (!constantinfo_reserved) 943 { 944 WARN("skip_constants parameter %s is not register bound.\n", 945 cdesc[index].Name); 946 hr = D3DERR_INVALIDCALL; 947 goto cleanup; 948 } 949 TRACE("Skipping constant %s.\n", cdesc[index].Name); 950 break; 951 } 952 } 953 if (j < skip_constants_count) 954 continue; 955 ++out->input_count; 956 if (inputs_param[index]->class == D3DXPC_OBJECT) 957 continue; 958 if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index]))) 959 goto cleanup; 960 } 961 if (pres) 962 append_pres_const_sets_for_shader_input(out, pres); 963 if (out->const_set_count) 964 { 965 struct d3dx_const_param_eval_output *new_alloc; 966 967 qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set); 968 969 i = 0; 970 while (i < out->const_set_count - 1) 971 { 972 if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD 973 && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD 974 && out->const_set[i].table == out->const_set[i + 1].table 975 && out->const_set[i].register_index + out->const_set[i].register_count 976 >= out->const_set[i + 1].register_index) 977 { 978 assert(out->const_set[i].register_index + out->const_set[i].register_count 979 <= out->const_set[i + 1].register_index + 1); 980 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1 981 - out->const_set[i].register_index; 982 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i]) 983 * (out->const_set_count - i - 2)); 984 --out->const_set_count; 985 } 986 else 987 { 988 ++i; 989 } 990 } 991 992 new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set, 993 sizeof(*out->const_set) * out->const_set_count); 994 if (new_alloc) 995 { 996 out->const_set = new_alloc; 997 out->const_set_size = out->const_set_count; 998 } 999 else 1000 { 1001 WARN("Out of memory.\n"); 1002 } 1003 } 1004 cleanup: 1005 ID3DXConstantTable_Release(ctab); 1006 return hr; 1007 } 1008 1009 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register) 1010 { 1011 if (table < PRES_REGTAB_COUNT) 1012 table_sizes[table] = max(table_sizes[table], max_register + 1); 1013 } 1014 1015 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab) 1016 { 1017 unsigned int i, table, max_register; 1018 1019 for (i = 0; i < ctab->input_count; ++i) 1020 { 1021 if (!ctab->inputs[i].RegisterCount) 1022 continue; 1023 max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1; 1024 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1025 update_table_size(table_sizes, table, max_register); 1026 } 1027 } 1028 1029 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count) 1030 { 1031 static const char *xyzw_str = "xyzw"; 1032 unsigned int i, table; 1033 1034 table = arg->reg.table; 1035 if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT) 1036 { 1037 TRACE("("); 1038 for (i = 0; i < component_count; ++i) 1039 TRACE(i < component_count - 1 ? "%.16e, " : "%.16e", 1040 ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]); 1041 TRACE(")"); 1042 } 1043 else 1044 { 1045 if (arg->index_reg.table == PRES_REGTAB_COUNT) 1046 { 1047 TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset)); 1048 } 1049 else 1050 { 1051 unsigned int index_reg; 1052 1053 index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset); 1054 TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset), 1055 table_symbol[arg->index_reg.table], index_reg, 1056 xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]); 1057 } 1058 for (i = 0; i < component_count; ++i) 1059 TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]); 1060 } 1061 } 1062 1063 static void dump_registers(struct d3dx_const_tab *ctab) 1064 { 1065 unsigned int table, i; 1066 1067 for (i = 0; i < ctab->input_count; ++i) 1068 { 1069 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1070 TRACE("// %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)", 1071 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount); 1072 } 1073 } 1074 1075 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins) 1076 { 1077 unsigned int i; 1078 1079 TRACE("%s ", pres_op_info[ins->op].mnem); 1080 dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count); 1081 for (i = 0; i < pres_op_info[ins->op].input_count; ++i) 1082 { 1083 TRACE(", "); 1084 dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count); 1085 } 1086 TRACE("\n"); 1087 } 1088 1089 static void dump_preshader(struct d3dx_preshader *pres) 1090 { 1091 unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4; 1092 const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED]; 1093 1094 if (immediate_count) 1095 TRACE("// Immediates:\n"); 1096 for (i = 0; i < immediate_count; ++i) 1097 { 1098 if (!(i % 4)) 1099 TRACE("// "); 1100 TRACE("%.8e", immediates[i]); 1101 if (i % 4 == 3) 1102 TRACE("\n"); 1103 else 1104 TRACE(", "); 1105 } 1106 TRACE("// Preshader registers:\n"); 1107 dump_registers(&pres->inputs); 1108 TRACE("preshader\n"); 1109 for (i = 0; i < pres->ins_count; ++i) 1110 dump_ins(&pres->regs, &pres->ins[i]); 1111 } 1112 1113 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx_effect *effect) 1114 { 1115 unsigned int *p; 1116 unsigned int i, j, const_count; 1117 double *dconst; 1118 HRESULT hr; 1119 unsigned int saved_word; 1120 unsigned int section_size; 1121 1122 TRACE("Preshader version %#x.\n", *ptr & 0xffff); 1123 1124 if (!count) 1125 { 1126 WARN("Unexpected end of byte code buffer.\n"); 1127 return D3DXERR_INVALIDDATA; 1128 } 1129 1130 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, §ion_size); 1131 if (p) 1132 { 1133 const_count = *p++; 1134 if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int))) 1135 { 1136 WARN("Byte code buffer ends unexpectedly.\n"); 1137 return D3DXERR_INVALIDDATA; 1138 } 1139 dconst = (double *)p; 1140 } 1141 else 1142 { 1143 const_count = 0; 1144 dconst = NULL; 1145 } 1146 TRACE("%u double constants.\n", const_count); 1147 1148 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, §ion_size); 1149 if (!p) 1150 { 1151 WARN("Could not find preshader code.\n"); 1152 return D3D_OK; 1153 } 1154 pres->ins_count = *p++; 1155 --section_size; 1156 if (pres->ins_count > UINT_MAX / sizeof(*pres->ins)) 1157 { 1158 WARN("Invalid instruction count %u.\n", pres->ins_count); 1159 return D3DXERR_INVALIDDATA; 1160 } 1161 TRACE("%u instructions.\n", pres->ins_count); 1162 pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count); 1163 if (!pres->ins) 1164 return E_OUTOFMEMORY; 1165 for (i = 0; i < pres->ins_count; ++i) 1166 { 1167 unsigned int *ptr_next; 1168 1169 ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]); 1170 if (!ptr_next) 1171 return D3DXERR_INVALIDDATA; 1172 section_size -= ptr_next - p; 1173 p = ptr_next; 1174 } 1175 1176 pres->inputs.regset2table = pres_regset2table; 1177 1178 saved_word = *ptr; 1179 *ptr = 0xfffe0000; 1180 hr = get_constants_desc(ptr, &pres->inputs, effect, NULL, 0, NULL); 1181 *ptr = saved_word; 1182 if (FAILED(hr)) 1183 return hr; 1184 1185 if (const_count % get_reg_components(PRES_REGTAB_IMMED)) 1186 { 1187 FIXME("const_count %u is not a multiple of %u.\n", const_count, 1188 get_reg_components(PRES_REGTAB_IMMED)); 1189 return D3DXERR_INVALIDDATA; 1190 } 1191 pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count); 1192 1193 update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs); 1194 for (i = 0; i < pres->ins_count; ++i) 1195 { 1196 for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j) 1197 { 1198 enum pres_reg_tables table; 1199 unsigned int reg_idx; 1200 1201 if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT) 1202 { 1203 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0 1204 : pres->ins[i].component_count - 1; 1205 1206 table = pres->ins[i].inputs[j].reg.table; 1207 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset 1208 + last_component_index); 1209 } 1210 else 1211 { 1212 table = pres->ins[i].inputs[j].index_reg.table; 1213 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset); 1214 } 1215 if (reg_idx >= pres->regs.table_sizes[table]) 1216 { 1217 /* Native accepts these broken preshaders. */ 1218 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u, preshader parsing failed.\n", 1219 i, j, table, reg_idx); 1220 return D3DXERR_INVALIDDATA; 1221 } 1222 } 1223 update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table, 1224 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset)); 1225 } 1226 if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED))) 1227 return E_OUTOFMEMORY; 1228 regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count); 1229 1230 return D3D_OK; 1231 } 1232 1233 HRESULT d3dx_create_param_eval(struct d3dx_effect *effect, void *byte_code, unsigned int byte_code_size, 1234 D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter, 1235 const char **skip_constants, unsigned int skip_constants_count) 1236 { 1237 struct d3dx_param_eval *peval; 1238 unsigned int *ptr, *shader_ptr = NULL; 1239 unsigned int i; 1240 BOOL shader; 1241 unsigned int count, pres_size; 1242 HRESULT ret; 1243 1244 TRACE("effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n", 1245 effect, byte_code, byte_code_size, type, peval_out); 1246 1247 count = byte_code_size / sizeof(unsigned int); 1248 if (!byte_code || !count) 1249 { 1250 *peval_out = NULL; 1251 return D3D_OK; 1252 } 1253 1254 peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval)); 1255 if (!peval) 1256 { 1257 ret = E_OUTOFMEMORY; 1258 goto err_out; 1259 } 1260 peval->version_counter = version_counter; 1261 1262 peval->param_type = type; 1263 switch (type) 1264 { 1265 case D3DXPT_VERTEXSHADER: 1266 case D3DXPT_PIXELSHADER: 1267 shader = TRUE; 1268 break; 1269 default: 1270 shader = FALSE; 1271 break; 1272 } 1273 peval->shader_inputs.regset2table = shad_regset2table; 1274 1275 ptr = (unsigned int *)byte_code; 1276 if (shader) 1277 { 1278 if ((*ptr & 0xfffe0000) != 0xfffe0000) 1279 { 1280 FIXME("Invalid shader signature %#x.\n", *ptr); 1281 ret = D3DXERR_INVALIDDATA; 1282 goto err_out; 1283 } 1284 TRACE("Shader version %#x.\n", *ptr & 0xffff); 1285 shader_ptr = ptr; 1286 ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size); 1287 if (!ptr) 1288 TRACE("No preshader found.\n"); 1289 } 1290 else 1291 { 1292 pres_size = count; 1293 } 1294 1295 if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, effect))) 1296 { 1297 FIXME("Failed parsing preshader, byte code for analysis follows.\n"); 1298 dump_bytecode(byte_code, byte_code_size); 1299 goto err_out; 1300 } 1301 1302 if (shader) 1303 { 1304 if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, effect, 1305 skip_constants, skip_constants_count, &peval->pres))) 1306 { 1307 TRACE("Could not get shader constant table, hr %#x.\n", ret); 1308 goto err_out; 1309 } 1310 update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs); 1311 } 1312 1313 for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i) 1314 { 1315 if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i))) 1316 goto err_out; 1317 } 1318 1319 if (TRACE_ON(d3dx)) 1320 { 1321 dump_bytecode(byte_code, byte_code_size); 1322 dump_preshader(&peval->pres); 1323 if (shader) 1324 { 1325 TRACE("// Shader registers:\n"); 1326 dump_registers(&peval->shader_inputs); 1327 } 1328 } 1329 *peval_out = peval; 1330 TRACE("Created parameter evaluator %p.\n", *peval_out); 1331 return D3D_OK; 1332 1333 err_out: 1334 WARN("Error creating parameter evaluator.\n"); 1335 if (TRACE_ON(d3dx)) 1336 dump_bytecode(byte_code, byte_code_size); 1337 1338 d3dx_free_param_eval(peval); 1339 *peval_out = NULL; 1340 return ret; 1341 } 1342 1343 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab) 1344 { 1345 HeapFree(GetProcessHeap(), 0, ctab->inputs); 1346 HeapFree(GetProcessHeap(), 0, ctab->inputs_param); 1347 HeapFree(GetProcessHeap(), 0, ctab->const_set); 1348 } 1349 1350 static void d3dx_free_preshader(struct d3dx_preshader *pres) 1351 { 1352 HeapFree(GetProcessHeap(), 0, pres->ins); 1353 1354 regstore_free_tables(&pres->regs); 1355 d3dx_free_const_tab(&pres->inputs); 1356 } 1357 1358 void d3dx_free_param_eval(struct d3dx_param_eval *peval) 1359 { 1360 TRACE("peval %p.\n", peval); 1361 1362 if (!peval) 1363 return; 1364 1365 d3dx_free_preshader(&peval->pres); 1366 d3dx_free_const_tab(&peval->shader_inputs); 1367 HeapFree(GetProcessHeap(), 0, peval); 1368 } 1369 1370 static void pres_int_from_float(void *out, const void *in, unsigned int count) 1371 { 1372 unsigned int i; 1373 const float *in_float = in; 1374 int *out_int = out; 1375 1376 for (i = 0; i < count; ++i) 1377 out_int[i] = in_float[i]; 1378 } 1379 1380 static void pres_bool_from_value(void *out, const void *in, unsigned int count) 1381 { 1382 unsigned int i; 1383 const DWORD *in_dword = in; 1384 BOOL *out_bool = out; 1385 1386 for (i = 0; i < count; ++i) 1387 out_bool[i] = !!in_dword[i]; 1388 } 1389 1390 static void pres_float_from_int(void *out, const void *in, unsigned int count) 1391 { 1392 unsigned int i; 1393 const int *in_int = in; 1394 float *out_float = out; 1395 1396 for (i = 0; i < count; ++i) 1397 out_float[i] = in_int[i]; 1398 } 1399 1400 static void pres_float_from_bool(void *out, const void *in, unsigned int count) 1401 { 1402 unsigned int i; 1403 const BOOL *in_bool = in; 1404 float *out_float = out; 1405 1406 for (i = 0; i < count; ++i) 1407 out_float[i] = !!in_bool[i]; 1408 } 1409 1410 static void pres_int_from_bool(void *out, const void *in, unsigned int count) 1411 { 1412 unsigned int i; 1413 const float *in_bool = in; 1414 int *out_int = out; 1415 1416 for (i = 0; i < count; ++i) 1417 out_int[i] = !!in_bool[i]; 1418 } 1419 1420 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table, 1421 unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type) 1422 { 1423 typedef void (*conv_func)(void *out, const void *in, unsigned int count); 1424 static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] = 1425 { 1426 {NULL, NULL, pres_int_from_float, pres_bool_from_value}, 1427 {NULL, NULL, NULL, NULL}, 1428 {pres_float_from_int, NULL, NULL, pres_bool_from_value}, 1429 {pres_float_from_bool, NULL, pres_int_from_bool, NULL} 1430 }; 1431 enum pres_value_type table_type = table_info[table].type; 1432 1433 if (param_type == table_type) 1434 { 1435 regstore_set_values(rs, table, in, offset, count); 1436 return; 1437 } 1438 1439 set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count); 1440 } 1441 1442 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1443 D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr, 1444 unsigned int start, unsigned int count) 1445 { 1446 if (type == D3DXPT_VERTEXSHADER) 1447 { 1448 switch(table) 1449 { 1450 case PRES_REGTAB_OCONST: 1451 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count); 1452 case PRES_REGTAB_OICONST: 1453 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count); 1454 case PRES_REGTAB_OBCONST: 1455 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count); 1456 default: 1457 FIXME("Unexpected register table %u.\n", table); 1458 return D3DERR_INVALIDCALL; 1459 } 1460 } 1461 else if (type == D3DXPT_PIXELSHADER) 1462 { 1463 switch(table) 1464 { 1465 case PRES_REGTAB_OCONST: 1466 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count); 1467 case PRES_REGTAB_OICONST: 1468 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count); 1469 case PRES_REGTAB_OBCONST: 1470 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count); 1471 default: 1472 FIXME("Unexpected register table %u.\n", table); 1473 return D3DERR_INVALIDCALL; 1474 } 1475 } 1476 else 1477 { 1478 FIXME("Unexpected parameter type %u.\n", type); 1479 return D3DERR_INVALIDCALL; 1480 } 1481 } 1482 1483 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab, 1484 ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1485 D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty) 1486 { 1487 unsigned int const_idx; 1488 unsigned int current_start = 0, current_count = 0; 1489 enum pres_reg_tables current_table = PRES_REGTAB_COUNT; 1490 BOOL update_device = manager || device; 1491 HRESULT hr, result = D3D_OK; 1492 ULONG64 update_version = const_tab->update_version; 1493 1494 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1495 { 1496 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1497 enum pres_reg_tables table = const_set->table; 1498 struct d3dx_parameter *param = const_set->param; 1499 unsigned int element, i, j, start_offset; 1500 struct const_upload_info info; 1501 unsigned int *data; 1502 enum pres_value_type param_type; 1503 1504 if (!(param && is_param_dirty(param, update_version))) 1505 continue; 1506 1507 data = param->data; 1508 start_offset = get_offset_reg(table, const_set->register_index); 1509 if (const_set->direct_copy) 1510 { 1511 regstore_set_values(rs, table, data, start_offset, 1512 get_offset_reg(table, const_set->register_count)); 1513 continue; 1514 } 1515 param_type = table_type_from_param_type(param->type); 1516 if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR) 1517 { 1518 unsigned int count = max(param->rows, param->columns); 1519 1520 if (count >= get_reg_components(table)) 1521 { 1522 regstore_set_data(rs, table, start_offset, data, 1523 count * const_set->element_count, param_type); 1524 } 1525 else 1526 { 1527 for (element = 0; element < const_set->element_count; ++element) 1528 regstore_set_data(rs, table, start_offset + get_offset_reg(table, element), 1529 &data[element * count], count, param_type); 1530 } 1531 continue; 1532 } 1533 get_const_upload_info(const_set, &info); 1534 for (element = 0; element < const_set->element_count; ++element) 1535 { 1536 unsigned int *out = (unsigned int *)rs->tables[table] + start_offset; 1537 1538 /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer. 1539 * All the supported types of parameters and table values have the same size. */ 1540 if (info.transpose) 1541 { 1542 for (i = 0; i < info.major_count; ++i) 1543 for (j = 0; j < info.minor; ++j) 1544 out[i * info.major_stride + j] = data[i + j * info.major]; 1545 1546 for (j = 0; j < info.minor_remainder; ++j) 1547 out[i * info.major_stride + j] = data[i + j * info.major]; 1548 } 1549 else 1550 { 1551 for (i = 0; i < info.major_count; ++i) 1552 for (j = 0; j < info.minor; ++j) 1553 out[i * info.major_stride + j] = data[i * info.minor + j]; 1554 } 1555 start_offset += get_offset_reg(table, const_set->register_count); 1556 data += param->rows * param->columns; 1557 } 1558 start_offset = get_offset_reg(table, const_set->register_index); 1559 if (table_info[table].type != param_type) 1560 regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset, 1561 get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type); 1562 } 1563 const_tab->update_version = new_update_version; 1564 if (!update_device) 1565 return D3D_OK; 1566 1567 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1568 { 1569 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1570 1571 if (device_update_all || (const_set->param 1572 ? is_param_dirty(const_set->param, update_version) : pres_dirty)) 1573 { 1574 enum pres_reg_tables table = const_set->table; 1575 1576 if (table == current_table && current_start + current_count == const_set->register_index) 1577 { 1578 current_count += const_set->register_count * const_set->element_count; 1579 } 1580 else 1581 { 1582 if (current_count) 1583 { 1584 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1585 (DWORD *)rs->tables[current_table] 1586 + get_offset_reg(current_table, current_start), current_start, current_count))) 1587 result = hr; 1588 } 1589 current_table = table; 1590 current_start = const_set->register_index; 1591 current_count = const_set->register_count * const_set->element_count; 1592 } 1593 } 1594 } 1595 if (current_count) 1596 { 1597 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1598 (DWORD *)rs->tables[current_table] 1599 + get_offset_reg(current_table, current_start), current_start, current_count))) 1600 result = hr; 1601 } 1602 return result; 1603 } 1604 1605 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset) 1606 { 1607 return regstore_get_double(rs, table, offset); 1608 } 1609 1610 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp) 1611 { 1612 unsigned int offset, base_index, reg_index, table; 1613 1614 table = opr->reg.table; 1615 1616 if (opr->index_reg.table == PRES_REGTAB_COUNT) 1617 base_index = 0; 1618 else 1619 base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset)); 1620 1621 offset = get_offset_reg(table, base_index) + opr->reg.offset + comp; 1622 reg_index = get_reg_offset(table, offset); 1623 1624 if (reg_index >= rs->table_sizes[table]) 1625 { 1626 unsigned int wrap_size; 1627 1628 if (table == PRES_REGTAB_CONST) 1629 { 1630 /* As it can be guessed from tests, offset into floating constant table is wrapped 1631 * to the nearest power of 2 and not to the actual table size. */ 1632 for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1) 1633 ; 1634 } 1635 else 1636 { 1637 wrap_size = rs->table_sizes[table]; 1638 } 1639 WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n", 1640 reg_index, table, wrap_size, rs->table_sizes[table]); 1641 reg_index %= wrap_size; 1642 1643 if (reg_index >= rs->table_sizes[table]) 1644 return 0.0; 1645 1646 offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table); 1647 } 1648 1649 return exec_get_reg_value(rs, table, offset); 1650 } 1651 1652 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg, 1653 unsigned int comp, double res) 1654 { 1655 regstore_set_double(rs, reg->table, reg->offset + comp, res); 1656 } 1657 1658 #define ARGS_ARRAY_SIZE 8 1659 static HRESULT execute_preshader(struct d3dx_preshader *pres) 1660 { 1661 unsigned int i, j, k; 1662 double args[ARGS_ARRAY_SIZE]; 1663 double res; 1664 1665 for (i = 0; i < pres->ins_count; ++i) 1666 { 1667 const struct d3dx_pres_ins *ins; 1668 const struct op_info *oi; 1669 1670 ins = &pres->ins[i]; 1671 oi = &pres_op_info[ins->op]; 1672 if (oi->func_all_comps) 1673 { 1674 if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE) 1675 { 1676 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count); 1677 return E_FAIL; 1678 } 1679 for (k = 0; k < oi->input_count; ++k) 1680 for (j = 0; j < ins->component_count; ++j) 1681 args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k], 1682 ins->scalar_op && !k ? 0 : j); 1683 res = oi->func(args, ins->component_count); 1684 1685 /* only 'dot' instruction currently falls here */ 1686 exec_set_arg(&pres->regs, &ins->output.reg, 0, res); 1687 } 1688 else 1689 { 1690 for (j = 0; j < ins->component_count; ++j) 1691 { 1692 for (k = 0; k < oi->input_count; ++k) 1693 args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j); 1694 res = oi->func(args, ins->component_count); 1695 exec_set_arg(&pres->regs, &ins->output.reg, j, res); 1696 } 1697 } 1698 } 1699 return D3D_OK; 1700 } 1701 1702 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version) 1703 { 1704 unsigned int i; 1705 1706 if (update_version == ULONG64_MAX) 1707 update_version = ctab->update_version; 1708 for (i = 0; i < ctab->input_count; ++i) 1709 { 1710 if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]), 1711 update_version)) 1712 return TRUE; 1713 } 1714 return FALSE; 1715 } 1716 1717 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version) 1718 { 1719 return is_const_tab_input_dirty(&peval->pres.inputs, update_version) 1720 || is_const_tab_input_dirty(&peval->shader_inputs, update_version); 1721 } 1722 1723 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param, 1724 void *param_value) 1725 { 1726 HRESULT hr; 1727 unsigned int i; 1728 unsigned int elements, elements_param, elements_table; 1729 float *oc; 1730 1731 TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value); 1732 1733 if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX)) 1734 { 1735 set_constants(&peval->pres.regs, &peval->pres.inputs, 1736 next_update_version(peval->version_counter), 1737 NULL, NULL, peval->param_type, FALSE, FALSE); 1738 1739 if (FAILED(hr = execute_preshader(&peval->pres))) 1740 return hr; 1741 } 1742 1743 elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]); 1744 elements_param = param->bytes / sizeof(unsigned int); 1745 elements = min(elements_table, elements_param); 1746 oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST]; 1747 for (i = 0; i < elements; ++i) 1748 set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT); 1749 return D3D_OK; 1750 } 1751 1752 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1753 struct d3dx_param_eval *peval, BOOL update_all) 1754 { 1755 HRESULT hr; 1756 struct d3dx_preshader *pres = &peval->pres; 1757 struct d3dx_regstore *rs = &pres->regs; 1758 ULONG64 new_update_version = next_update_version(peval->version_counter); 1759 BOOL pres_dirty = FALSE; 1760 1761 TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type); 1762 1763 if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX)) 1764 { 1765 set_constants(rs, &pres->inputs, new_update_version, 1766 NULL, NULL, peval->param_type, FALSE, FALSE); 1767 if (FAILED(hr = execute_preshader(pres))) 1768 return hr; 1769 pres_dirty = TRUE; 1770 } 1771 1772 return set_constants(rs, &peval->shader_inputs, new_update_version, 1773 manager, device, peval->param_type, update_all, pres_dirty); 1774 } 1775