1 /* 2 * Copyright 2016 Paul Gofman 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * This library is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with this library; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 17 */ 18 19 #include "config.h" 20 #include "wine/port.h" 21 22 #include "d3dx9_private.h" 23 24 #include <float.h> 25 #include <assert.h> 26 27 WINE_DEFAULT_DEBUG_CHANNEL(d3dx); 28 29 /* ReactOS FIXME: Insect */ 30 #define fmin min 31 #define fmax max 32 33 enum pres_ops 34 { 35 PRESHADER_OP_NOP, 36 PRESHADER_OP_MOV, 37 PRESHADER_OP_NEG, 38 PRESHADER_OP_RCP, 39 PRESHADER_OP_FRC, 40 PRESHADER_OP_EXP, 41 PRESHADER_OP_LOG, 42 PRESHADER_OP_RSQ, 43 PRESHADER_OP_SIN, 44 PRESHADER_OP_COS, 45 PRESHADER_OP_ASIN, 46 PRESHADER_OP_ACOS, 47 PRESHADER_OP_ATAN, 48 PRESHADER_OP_MIN, 49 PRESHADER_OP_MAX, 50 PRESHADER_OP_LT, 51 PRESHADER_OP_GE, 52 PRESHADER_OP_ADD, 53 PRESHADER_OP_MUL, 54 PRESHADER_OP_ATAN2, 55 PRESHADER_OP_DIV, 56 PRESHADER_OP_CMP, 57 PRESHADER_OP_DOT, 58 PRESHADER_OP_DOTSWIZ6, 59 PRESHADER_OP_DOTSWIZ8, 60 }; 61 62 typedef double (*pres_op_func)(double *args, int n); 63 64 static double to_signed_nan(double v) 65 { 66 static const union 67 { 68 ULONG64 ulong64_value; 69 double double_value; 70 } 71 signed_nan = 72 { 73 0xfff8000000000000 74 }; 75 76 return isnan(v) ? signed_nan.double_value : v; 77 } 78 79 static double pres_mov(double *args, int n) {return args[0];} 80 static double pres_add(double *args, int n) {return args[0] + args[1];} 81 static double pres_mul(double *args, int n) {return args[0] * args[1];} 82 static double pres_dot(double *args, int n) 83 { 84 int i; 85 double sum; 86 87 sum = 0.0; 88 for (i = 0; i < n; ++i) 89 sum += args[i] * args[i + n]; 90 return sum; 91 } 92 93 static double pres_dotswiz6(double *args, int n) 94 { 95 return pres_dot(args, 3); 96 } 97 98 static double pres_dotswiz8(double *args, int n) 99 { 100 return pres_dot(args, 4); 101 } 102 103 static double pres_neg(double *args, int n) {return -args[0];} 104 static double pres_rcp(double *args, int n) {return 1.0 / args[0];} 105 static double pres_lt(double *args, int n) {return args[0] < args[1] ? 1.0 : 0.0;} 106 static double pres_ge(double *args, int n) {return args[0] >= args[1] ? 1.0 : 0.0;} 107 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);} 108 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);} 109 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);} 110 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];} 111 static double pres_sin(double *args, int n) {return sin(args[0]);} 112 static double pres_cos(double *args, int n) {return cos(args[0]);} 113 static double pres_rsq(double *args, int n) 114 { 115 double v; 116 117 v = fabs(args[0]); 118 if (v == 0.0) 119 return INFINITY; 120 else 121 return 1.0 / sqrt(v); 122 } 123 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);} 124 static double pres_log(double *args, int n) 125 { 126 double v; 127 128 v = fabs(args[0]); 129 if (v == 0.0) 130 return 0.0; 131 else 132 #ifdef HAVE_LOG2 133 return log2(v); 134 #else 135 return log(v) / log(2); 136 #endif 137 } 138 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));} 139 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));} 140 static double pres_atan(double *args, int n) {return atan(args[0]);} 141 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);} 142 143 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever 144 * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */ 145 static double pres_div(double *args, int n) {return 0.0;} 146 147 #define PRES_OPCODE_MASK 0x7ff00000 148 #define PRES_OPCODE_SHIFT 20 149 #define PRES_SCALAR_FLAG 0x80000000 150 #define PRES_NCOMP_MASK 0x0000ffff 151 152 #define FOURCC_PRES 0x53455250 153 #define FOURCC_CLIT 0x54494c43 154 #define FOURCC_FXLC 0x434c5846 155 #define FOURCC_PRSI 0x49535250 156 #define PRES_SIGN 0x46580000 157 158 struct op_info 159 { 160 unsigned int opcode; 161 char mnem[16]; 162 unsigned int input_count; 163 BOOL func_all_comps; 164 pres_op_func func; 165 }; 166 167 static const struct op_info pres_op_info[] = 168 { 169 {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */ 170 {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */ 171 {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */ 172 {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */ 173 {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */ 174 {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */ 175 {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */ 176 {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */ 177 {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */ 178 {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */ 179 {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */ 180 {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */ 181 {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */ 182 {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */ 183 {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */ 184 {0x202, "lt", 2, 0, pres_lt }, /* PRESHADER_OP_LT */ 185 {0x203, "ge", 2, 0, pres_ge }, /* PRESHADER_OP_GE */ 186 {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */ 187 {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */ 188 {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */ 189 {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */ 190 {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */ 191 {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */ 192 {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */ 193 {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */ 194 }; 195 196 enum pres_value_type 197 { 198 PRES_VT_FLOAT, 199 PRES_VT_DOUBLE, 200 PRES_VT_INT, 201 PRES_VT_BOOL, 202 PRES_VT_COUNT 203 }; 204 205 static const struct 206 { 207 unsigned int component_size; 208 enum pres_value_type type; 209 } 210 table_info[] = 211 { 212 {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */ 213 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */ 214 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */ 215 {sizeof(BOOL), PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */ 216 {sizeof(int), PRES_VT_INT, }, /* PRES_REGTAB_OICONST */ 217 /* TODO: use double precision for 64 bit */ 218 {sizeof(float), PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */ 219 }; 220 221 static const char *table_symbol[] = 222 { 223 "imm", "c", "oc", "ob", "oi", "r", "(null)", 224 }; 225 226 static const enum pres_reg_tables pres_regset2table[] = 227 { 228 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 229 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 230 PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */ 231 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 232 }; 233 234 static const enum pres_reg_tables shad_regset2table[] = 235 { 236 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 237 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 238 PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */ 239 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 240 }; 241 242 struct d3dx_pres_reg 243 { 244 enum pres_reg_tables table; 245 /* offset is component index, not register index, e. g. 246 offset for component c3.y is 13 (3 * 4 + 1) */ 247 unsigned int offset; 248 }; 249 250 struct d3dx_pres_operand 251 { 252 struct d3dx_pres_reg reg; 253 struct d3dx_pres_reg index_reg; 254 }; 255 256 #define MAX_INPUTS_COUNT 8 257 258 struct d3dx_pres_ins 259 { 260 enum pres_ops op; 261 /* first input argument is scalar, 262 scalar component is propagated */ 263 BOOL scalar_op; 264 unsigned int component_count; 265 struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT]; 266 struct d3dx_pres_operand output; 267 }; 268 269 struct const_upload_info 270 { 271 BOOL transpose; 272 unsigned int major, minor; 273 unsigned int major_stride; 274 unsigned int major_count; 275 unsigned int count; 276 unsigned int minor_remainder; 277 }; 278 279 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type) 280 { 281 switch (type) 282 { 283 case D3DXPT_FLOAT: 284 return PRES_VT_FLOAT; 285 case D3DXPT_INT: 286 return PRES_VT_INT; 287 case D3DXPT_BOOL: 288 return PRES_VT_BOOL; 289 default: 290 FIXME("Unsupported type %u.\n", type); 291 return PRES_VT_COUNT; 292 } 293 } 294 295 static unsigned int get_reg_offset(unsigned int table, unsigned int offset) 296 { 297 return table == PRES_REGTAB_OBCONST ? offset : offset >> 2; 298 } 299 300 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx) 301 { 302 return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2; 303 } 304 305 static unsigned int get_reg_components(unsigned int table) 306 { 307 return get_offset_reg(table, 1); 308 } 309 310 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8) 311 312 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table) 313 { 314 unsigned int size; 315 316 size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size; 317 if (size) 318 { 319 rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size); 320 if (!rs->tables[table]) 321 return E_OUTOFMEMORY; 322 } 323 return D3D_OK; 324 } 325 326 static void regstore_free_tables(struct d3dx_regstore *rs) 327 { 328 unsigned int i; 329 330 for (i = 0; i < PRES_REGTAB_COUNT; ++i) 331 { 332 HeapFree(GetProcessHeap(), 0, rs->tables[i]); 333 } 334 } 335 336 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data, 337 unsigned int start_offset, unsigned int count) 338 { 339 BYTE *dst = rs->tables[table]; 340 const BYTE *src = data; 341 unsigned int size; 342 343 dst += start_offset * table_info[table].component_size; 344 size = count * table_info[table].component_size; 345 assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst)); 346 memcpy(dst, src, size); 347 } 348 349 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset) 350 { 351 BYTE *p; 352 353 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 354 switch (table_info[table].type) 355 { 356 case PRES_VT_FLOAT: 357 return *(float *)p; 358 case PRES_VT_DOUBLE: 359 return *(double *)p; 360 default: 361 FIXME("Unexpected preshader input from table %u.\n", table); 362 return NAN; 363 } 364 } 365 366 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v) 367 { 368 BYTE *p; 369 370 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 371 switch (table_info[table].type) 372 { 373 case PRES_VT_FLOAT : *(float *)p = v; break; 374 case PRES_VT_DOUBLE: *(double *)p = v; break; 375 case PRES_VT_INT : *(int *)p = lrint(v); break; 376 case PRES_VT_BOOL : *(BOOL *)p = !!v; break; 377 default: 378 FIXME("Bad type %u.\n", table_info[table].type); 379 break; 380 } 381 } 382 383 static void dump_bytecode(void *data, unsigned int size) 384 { 385 unsigned int *bytecode = (unsigned int *)data; 386 unsigned int i, j, n; 387 388 size /= sizeof(*bytecode); 389 i = 0; 390 while (i < size) 391 { 392 n = min(size - i, 8); 393 for (j = 0; j < n; ++j) 394 TRACE("0x%08x,", bytecode[i + j]); 395 i += n; 396 TRACE("\n"); 397 } 398 } 399 400 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count, 401 unsigned int fourcc, unsigned int *size) 402 { 403 /* Provide at least one value in comment section on non-NULL return. */ 404 while (count > 2 && (*ptr & 0xffff) == 0xfffe) 405 { 406 unsigned int section_size; 407 408 section_size = (*ptr >> 16); 409 if (!section_size || section_size + 1 > count) 410 break; 411 if (*(ptr + 1) == fourcc) 412 { 413 *size = section_size; 414 return ptr + 2; 415 } 416 count -= section_size + 1; 417 ptr += section_size + 1; 418 } 419 return NULL; 420 } 421 422 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg) 423 { 424 static const enum pres_reg_tables reg_table[8] = 425 { 426 PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT, 427 PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP 428 }; 429 430 if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT) 431 { 432 FIXME("Unsupported register table %#x.\n", *ptr); 433 return NULL; 434 } 435 436 reg->table = reg_table[*ptr++]; 437 reg->offset = *ptr++; 438 return ptr; 439 } 440 441 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr) 442 { 443 if (count < 3 || (*ptr && count < 5)) 444 { 445 WARN("Byte code buffer ends unexpectedly, count %u.\n", count); 446 return NULL; 447 } 448 449 if (*ptr) 450 { 451 if (*ptr != 1) 452 { 453 FIXME("Unknown relative addressing flag, word %#x.\n", *ptr); 454 return NULL; 455 } 456 ptr = parse_pres_reg(ptr + 1, &opr->index_reg); 457 if (!ptr) 458 return NULL; 459 } 460 else 461 { 462 opr->index_reg.table = PRES_REGTAB_COUNT; 463 ++ptr; 464 } 465 466 ptr = parse_pres_reg(ptr, &opr->reg); 467 468 if (opr->reg.table == PRES_REGTAB_OBCONST) 469 opr->reg.offset /= 4; 470 return ptr; 471 } 472 473 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins) 474 { 475 unsigned int ins_code, ins_raw; 476 unsigned int input_count; 477 unsigned int i; 478 479 if (count < 2) 480 { 481 WARN("Byte code buffer ends unexpectedly.\n"); 482 return NULL; 483 } 484 485 ins_raw = *ptr++; 486 ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT; 487 ins->component_count = ins_raw & PRES_NCOMP_MASK; 488 ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG); 489 490 if (ins->component_count < 1 || ins->component_count > 4) 491 { 492 FIXME("Unsupported number of components %u.\n", ins->component_count); 493 return NULL; 494 } 495 input_count = *ptr++; 496 count -= 2; 497 for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i) 498 if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count) 499 break; 500 if (i == ARRAY_SIZE(pres_op_info)) 501 { 502 FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw); 503 return NULL; 504 } 505 ins->op = i; 506 if (input_count > ARRAY_SIZE(ins->inputs)) 507 { 508 FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count, 509 pres_op_info[i].mnem); 510 return NULL; 511 } 512 for (i = 0; i < input_count; ++i) 513 { 514 unsigned int *p; 515 516 p = parse_pres_arg(ptr, count, &ins->inputs[i]); 517 if (!p) 518 return NULL; 519 count -= p - ptr; 520 ptr = p; 521 } 522 ptr = parse_pres_arg(ptr, count, &ins->output); 523 if (ins->output.index_reg.table != PRES_REGTAB_COUNT) 524 { 525 FIXME("Relative addressing in output register not supported.\n"); 526 return NULL; 527 } 528 if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset 529 + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1)) 530 != get_reg_offset(ins->output.reg.table, ins->output.reg.offset)) 531 { 532 FIXME("Instructions outputting multiple registers are not supported.\n"); 533 return NULL; 534 } 535 return ptr; 536 } 537 538 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc, 539 WORD *constantinfo_reserved) 540 { 541 const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc); 542 543 if (!constant) 544 { 545 FIXME("Could not get constant desc.\n"); 546 if (constantinfo_reserved) 547 *constantinfo_reserved = 0; 548 return D3DERR_INVALIDCALL; 549 } 550 *desc = constant->desc; 551 if (constantinfo_reserved) 552 *constantinfo_reserved = constant->constantinfo_reserved; 553 return D3D_OK; 554 } 555 556 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set, 557 struct const_upload_info *info) 558 { 559 struct d3dx_parameter *param = const_set->param; 560 unsigned int table = const_set->table; 561 562 info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS) 563 || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS); 564 if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS) 565 { 566 info->major = param->columns; 567 info->minor = param->rows; 568 } 569 else 570 { 571 info->major = param->rows; 572 info->minor = param->columns; 573 } 574 575 if (get_reg_components(table) == 1) 576 { 577 unsigned int const_length = get_offset_reg(table, const_set->register_count); 578 579 info->major_stride = info->minor; 580 info->major_count = const_length / info->major_stride; 581 info->minor_remainder = const_length % info->major_stride; 582 } 583 else 584 { 585 info->major_stride = get_reg_components(table); 586 info->major_count = const_set->register_count; 587 info->minor_remainder = 0; 588 } 589 info->count = info->major_count * info->minor + info->minor_remainder; 590 } 591 592 #define INITIAL_CONST_SET_SIZE 16 593 594 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set) 595 { 596 if (const_tab->const_set_count >= const_tab->const_set_size) 597 { 598 unsigned int new_size; 599 struct d3dx_const_param_eval_output *new_alloc; 600 601 if (!const_tab->const_set_size) 602 { 603 new_size = INITIAL_CONST_SET_SIZE; 604 new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size); 605 if (!new_alloc) 606 { 607 ERR("Out of memory.\n"); 608 return E_OUTOFMEMORY; 609 } 610 } 611 else 612 { 613 new_size = const_tab->const_set_size * 2; 614 new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set, 615 sizeof(*const_tab->const_set) * new_size); 616 if (!new_alloc) 617 { 618 ERR("Out of memory.\n"); 619 return E_OUTOFMEMORY; 620 } 621 } 622 const_tab->const_set = new_alloc; 623 const_tab->const_set_size = new_size; 624 } 625 const_tab->const_set[const_tab->const_set_count++] = *set; 626 return D3D_OK; 627 } 628 629 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab, 630 struct d3dx_preshader *pres) 631 { 632 unsigned int i; 633 struct d3dx_const_param_eval_output const_set = {NULL}; 634 635 for (i = 0; i < pres->ins_count; ++i) 636 { 637 const struct d3dx_pres_ins *ins = &pres->ins[i]; 638 const struct d3dx_pres_reg *reg = &ins->output.reg; 639 640 if (reg->table == PRES_REGTAB_TEMP) 641 continue; 642 643 const_set.register_index = get_reg_offset(reg->table, reg->offset); 644 const_set.register_count = 1; 645 const_set.table = reg->table; 646 const_set.constant_class = D3DXPC_FORCE_DWORD; 647 const_set.element_count = 1; 648 append_const_set(const_tab, &const_set); 649 } 650 } 651 652 static int compare_const_set(const void *a, const void *b) 653 { 654 const struct d3dx_const_param_eval_output *r1 = a; 655 const struct d3dx_const_param_eval_output *r2 = b; 656 657 if (r1->table != r2->table) 658 return r1->table - r2->table; 659 return r1->register_index - r2->register_index; 660 } 661 662 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab, 663 struct d3dx_parameter *param, unsigned int index) 664 { 665 unsigned int i, start_index = index; 666 DWORD *current_data; 667 enum pres_reg_tables current_table; 668 unsigned int current_start_offset, element_count; 669 struct d3dx_const_param_eval_output *first_const; 670 671 if (!const_tab->const_set_count) 672 return D3D_OK; 673 674 while (index < const_tab->const_set_count - 1) 675 { 676 first_const = &const_tab->const_set[index]; 677 current_data = first_const->param->data; 678 current_table = first_const->table; 679 current_start_offset = get_offset_reg(current_table, first_const->register_index); 680 element_count = 0; 681 for (i = index; i < const_tab->const_set_count; ++i) 682 { 683 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i]; 684 unsigned int count = get_offset_reg(const_set->table, 685 const_set->register_count * const_set->element_count); 686 unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index); 687 688 if (!(const_set->table == current_table && current_start_offset == start_offset 689 && const_set->direct_copy == first_const->direct_copy 690 && current_data == const_set->param->data 691 && (const_set->direct_copy || (first_const->param->type == const_set->param->type 692 && first_const->param->class == const_set->param->class 693 && first_const->param->columns == const_set->param->columns 694 && first_const->param->rows == const_set->param->rows 695 && first_const->register_count == const_set->register_count 696 && (i == const_tab->const_set_count - 1 697 || first_const->param->element_count == const_set->param->element_count))))) 698 break; 699 700 current_start_offset += count; 701 current_data += const_set->direct_copy ? count : const_set->param->rows 702 * const_set->param->columns * const_set->element_count; 703 element_count += const_set->element_count; 704 } 705 706 if (i > index + 1) 707 { 708 TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index, 709 debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy); 710 711 first_const->element_count = element_count; 712 if (first_const->direct_copy) 713 { 714 first_const->element_count = 1; 715 if (index == start_index 716 && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT)) 717 { 718 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 719 return D3DERR_INVALIDCALL; 720 first_const->param = param; 721 } 722 first_const->register_count = get_reg_offset(current_table, current_start_offset) 723 - first_const->register_index; 724 } 725 memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i], 726 sizeof(*const_tab->const_set) * (const_tab->const_set_count - i)); 727 const_tab->const_set_count -= i - index - 1; 728 } 729 else 730 { 731 TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n", 732 const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy); 733 } 734 index = i; 735 } 736 return D3D_OK; 737 } 738 739 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab, 740 D3DXHANDLE hc, struct d3dx_parameter *param) 741 { 742 D3DXCONSTANT_DESC desc; 743 unsigned int const_count, param_count, i; 744 BOOL get_element; 745 struct d3dx_const_param_eval_output const_set; 746 struct const_upload_info info; 747 enum pres_value_type table_type; 748 HRESULT hr; 749 750 if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL))) 751 return D3DERR_INVALIDCALL; 752 753 if (param->element_count) 754 { 755 param_count = param->element_count; 756 const_count = desc.Elements; 757 get_element = TRUE; 758 } 759 else 760 { 761 if (desc.Elements > 1) 762 { 763 FIXME("Unexpected number of constant elements %u.\n", desc.Elements); 764 return D3DERR_INVALIDCALL; 765 } 766 param_count = param->member_count; 767 const_count = desc.StructMembers; 768 get_element = FALSE; 769 } 770 if (const_count != param_count) 771 { 772 FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n", 773 param_count, const_count); 774 return D3DERR_INVALIDCALL; 775 } 776 if (const_count) 777 { 778 HRESULT ret = D3D_OK; 779 D3DXHANDLE hc_element; 780 unsigned int index = const_tab->const_set_count; 781 782 for (i = 0; i < const_count; ++i) 783 { 784 if (get_element) 785 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i); 786 else 787 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i); 788 if (!hc_element) 789 { 790 FIXME("Could not get constant.\n"); 791 hr = D3DERR_INVALIDCALL; 792 } 793 else 794 { 795 hr = init_set_constants_param(const_tab, ctab, hc_element, ¶m->members[i]); 796 } 797 if (FAILED(hr)) 798 ret = hr; 799 } 800 if (FAILED(ret)) 801 return ret; 802 return merge_const_set_entries(const_tab, param, index); 803 } 804 805 TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n", 806 debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes); 807 TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n", 808 debugstr_a(param->name), param->rows, param->columns, param->class, 809 param->flags, param->bytes); 810 811 const_set.element_count = 1; 812 const_set.param = param; 813 const_set.constant_class = desc.Class; 814 if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table)) 815 { 816 FIXME("Unknown register set %u.\n", desc.RegisterSet); 817 return D3DERR_INVALIDCALL; 818 } 819 const_set.register_index = desc.RegisterIndex; 820 const_set.table = const_tab->regset2table[desc.RegisterSet]; 821 if (const_set.table >= PRES_REGTAB_COUNT) 822 { 823 ERR("Unexpected register set %u.\n", desc.RegisterSet); 824 return D3DERR_INVALIDCALL; 825 } 826 assert(table_info[const_set.table].component_size == sizeof(unsigned int)); 827 assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int)); 828 const_set.register_count = desc.RegisterCount; 829 table_type = table_info[const_set.table].type; 830 get_const_upload_info(&const_set, &info); 831 if (!info.count) 832 { 833 TRACE("%s has zero count, skipping.\n", debugstr_a(param->name)); 834 return D3D_OK; 835 } 836 837 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 838 return D3DERR_INVALIDCALL; 839 840 const_set.direct_copy = table_type_from_param_type(param->type) == table_type 841 && !info.transpose && info.minor == info.major_stride 842 && info.count == get_offset_reg(const_set.table, const_set.register_count) 843 && info.count * sizeof(unsigned int) <= param->bytes; 844 if (info.minor_remainder && !const_set.direct_copy && !info.transpose) 845 FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n", 846 debugstr_a(param->name)); 847 848 if (info.major_count > info.major 849 || (info.major_count == info.major && info.minor_remainder)) 850 { 851 WARN("Constant dimensions exceed parameter size.\n"); 852 return D3DERR_INVALIDCALL; 853 } 854 855 if (FAILED(hr = append_const_set(const_tab, &const_set))) 856 return hr; 857 858 return D3D_OK; 859 } 860 861 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out, 862 struct d3dx9_base_effect *base, const char **skip_constants, 863 unsigned int skip_constants_count, struct d3dx_preshader *pres) 864 { 865 ID3DXConstantTable *ctab; 866 D3DXCONSTANT_DESC *cdesc; 867 struct d3dx_parameter **inputs_param; 868 D3DXCONSTANTTABLE_DESC desc; 869 HRESULT hr; 870 D3DXHANDLE hc; 871 unsigned int i, j; 872 873 hr = D3DXGetShaderConstantTable(byte_code, &ctab); 874 if (FAILED(hr) || !ctab) 875 { 876 TRACE("Could not get CTAB data, hr %#x.\n", hr); 877 /* returning OK, shaders and preshaders without CTAB are valid */ 878 return D3D_OK; 879 } 880 if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc))) 881 { 882 FIXME("Could not get CTAB desc, hr %#x.\n", hr); 883 goto cleanup; 884 } 885 886 out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants); 887 out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants); 888 if (!cdesc || !inputs_param) 889 { 890 hr = E_OUTOFMEMORY; 891 goto cleanup; 892 } 893 894 for (i = 0; i < desc.Constants; ++i) 895 { 896 unsigned int index = out->input_count; 897 WORD constantinfo_reserved; 898 899 hc = ID3DXConstantTable_GetConstant(ctab, NULL, i); 900 if (!hc) 901 { 902 FIXME("Null constant handle.\n"); 903 goto cleanup; 904 } 905 if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved))) 906 goto cleanup; 907 inputs_param[index] = get_parameter_by_name(base, NULL, cdesc[index].Name); 908 if (!inputs_param[index]) 909 { 910 WARN("Could not find parameter %s in effect.\n", cdesc[index].Name); 911 continue; 912 } 913 if (cdesc[index].Class == D3DXPC_OBJECT) 914 { 915 TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]); 916 if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT 917 || !is_param_type_sampler(inputs_param[index]->type)) 918 { 919 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name)); 920 hr = D3DERR_INVALIDCALL; 921 goto cleanup; 922 } 923 if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount) 924 { 925 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name)); 926 hr = D3DERR_INVALIDCALL; 927 goto cleanup; 928 } 929 } 930 if (!is_top_level_parameter(inputs_param[index])) 931 { 932 WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name)); 933 hr = E_FAIL; 934 goto cleanup; 935 } 936 937 for (j = 0; j < skip_constants_count; ++j) 938 { 939 if (!strcmp(cdesc[index].Name, skip_constants[j])) 940 { 941 if (!constantinfo_reserved) 942 { 943 WARN("skip_constants parameter %s is not register bound.\n", 944 cdesc[index].Name); 945 hr = D3DERR_INVALIDCALL; 946 goto cleanup; 947 } 948 TRACE("Skipping constant %s.\n", cdesc[index].Name); 949 break; 950 } 951 } 952 if (j < skip_constants_count) 953 continue; 954 ++out->input_count; 955 if (inputs_param[index]->class == D3DXPC_OBJECT) 956 continue; 957 if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index]))) 958 goto cleanup; 959 } 960 if (pres) 961 append_pres_const_sets_for_shader_input(out, pres); 962 if (out->const_set_count) 963 { 964 struct d3dx_const_param_eval_output *new_alloc; 965 966 qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set); 967 968 i = 0; 969 while (i < out->const_set_count - 1) 970 { 971 if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD 972 && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD 973 && out->const_set[i].table == out->const_set[i + 1].table 974 && out->const_set[i].register_index + out->const_set[i].register_count 975 >= out->const_set[i + 1].register_index) 976 { 977 assert(out->const_set[i].register_index + out->const_set[i].register_count 978 <= out->const_set[i + 1].register_index + 1); 979 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1 980 - out->const_set[i].register_index; 981 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i]) 982 * (out->const_set_count - i - 2)); 983 --out->const_set_count; 984 } 985 else 986 { 987 ++i; 988 } 989 } 990 991 new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set, 992 sizeof(*out->const_set) * out->const_set_count); 993 if (new_alloc) 994 { 995 out->const_set = new_alloc; 996 out->const_set_size = out->const_set_count; 997 } 998 else 999 { 1000 WARN("Out of memory.\n"); 1001 } 1002 } 1003 cleanup: 1004 ID3DXConstantTable_Release(ctab); 1005 return hr; 1006 } 1007 1008 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register) 1009 { 1010 if (table < PRES_REGTAB_COUNT) 1011 table_sizes[table] = max(table_sizes[table], max_register + 1); 1012 } 1013 1014 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab) 1015 { 1016 unsigned int i, table, max_register; 1017 1018 for (i = 0; i < ctab->input_count; ++i) 1019 { 1020 if (!ctab->inputs[i].RegisterCount) 1021 continue; 1022 max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1; 1023 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1024 update_table_size(table_sizes, table, max_register); 1025 } 1026 } 1027 1028 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count) 1029 { 1030 static const char *xyzw_str = "xyzw"; 1031 unsigned int i, table; 1032 1033 table = arg->reg.table; 1034 if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT) 1035 { 1036 TRACE("("); 1037 for (i = 0; i < component_count; ++i) 1038 TRACE(i < component_count - 1 ? "%.16e, " : "%.16e", 1039 ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]); 1040 TRACE(")"); 1041 } 1042 else 1043 { 1044 if (arg->index_reg.table == PRES_REGTAB_COUNT) 1045 { 1046 TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset)); 1047 } 1048 else 1049 { 1050 unsigned int index_reg; 1051 1052 index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset); 1053 TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset), 1054 table_symbol[arg->index_reg.table], index_reg, 1055 xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]); 1056 } 1057 for (i = 0; i < component_count; ++i) 1058 TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]); 1059 } 1060 } 1061 1062 static void dump_registers(struct d3dx_const_tab *ctab) 1063 { 1064 unsigned int table, i; 1065 1066 for (i = 0; i < ctab->input_count; ++i) 1067 { 1068 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1069 TRACE("// %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)", 1070 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount); 1071 } 1072 } 1073 1074 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins) 1075 { 1076 unsigned int i; 1077 1078 TRACE("%s ", pres_op_info[ins->op].mnem); 1079 dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count); 1080 for (i = 0; i < pres_op_info[ins->op].input_count; ++i) 1081 { 1082 TRACE(", "); 1083 dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count); 1084 } 1085 TRACE("\n"); 1086 } 1087 1088 static void dump_preshader(struct d3dx_preshader *pres) 1089 { 1090 unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4; 1091 const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED]; 1092 1093 if (immediate_count) 1094 TRACE("// Immediates:\n"); 1095 for (i = 0; i < immediate_count; ++i) 1096 { 1097 if (!(i % 4)) 1098 TRACE("// "); 1099 TRACE("%.8e", immediates[i]); 1100 if (i % 4 == 3) 1101 TRACE("\n"); 1102 else 1103 TRACE(", "); 1104 } 1105 TRACE("// Preshader registers:\n"); 1106 dump_registers(&pres->inputs); 1107 TRACE("preshader\n"); 1108 for (i = 0; i < pres->ins_count; ++i) 1109 dump_ins(&pres->regs, &pres->ins[i]); 1110 } 1111 1112 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base) 1113 { 1114 unsigned int *p; 1115 unsigned int i, j, const_count; 1116 double *dconst; 1117 HRESULT hr; 1118 unsigned int saved_word; 1119 unsigned int section_size; 1120 1121 TRACE("Preshader version %#x.\n", *ptr & 0xffff); 1122 1123 if (!count) 1124 { 1125 WARN("Unexpected end of byte code buffer.\n"); 1126 return D3DXERR_INVALIDDATA; 1127 } 1128 1129 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, §ion_size); 1130 if (p) 1131 { 1132 const_count = *p++; 1133 if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int))) 1134 { 1135 WARN("Byte code buffer ends unexpectedly.\n"); 1136 return D3DXERR_INVALIDDATA; 1137 } 1138 dconst = (double *)p; 1139 } 1140 else 1141 { 1142 const_count = 0; 1143 dconst = NULL; 1144 } 1145 TRACE("%u double constants.\n", const_count); 1146 1147 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, §ion_size); 1148 if (!p) 1149 { 1150 WARN("Could not find preshader code.\n"); 1151 return D3D_OK; 1152 } 1153 pres->ins_count = *p++; 1154 --section_size; 1155 if (pres->ins_count > UINT_MAX / sizeof(*pres->ins)) 1156 { 1157 WARN("Invalid instruction count %u.\n", pres->ins_count); 1158 return D3DXERR_INVALIDDATA; 1159 } 1160 TRACE("%u instructions.\n", pres->ins_count); 1161 pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count); 1162 if (!pres->ins) 1163 return E_OUTOFMEMORY; 1164 for (i = 0; i < pres->ins_count; ++i) 1165 { 1166 unsigned int *ptr_next; 1167 1168 ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]); 1169 if (!ptr_next) 1170 return D3DXERR_INVALIDDATA; 1171 section_size -= ptr_next - p; 1172 p = ptr_next; 1173 } 1174 1175 pres->inputs.regset2table = pres_regset2table; 1176 1177 saved_word = *ptr; 1178 *ptr = 0xfffe0000; 1179 hr = get_constants_desc(ptr, &pres->inputs, base, NULL, 0, NULL); 1180 *ptr = saved_word; 1181 if (FAILED(hr)) 1182 return hr; 1183 1184 if (const_count % get_reg_components(PRES_REGTAB_IMMED)) 1185 { 1186 FIXME("const_count %u is not a multiple of %u.\n", const_count, 1187 get_reg_components(PRES_REGTAB_IMMED)); 1188 return D3DXERR_INVALIDDATA; 1189 } 1190 pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count); 1191 1192 update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs); 1193 for (i = 0; i < pres->ins_count; ++i) 1194 { 1195 for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j) 1196 { 1197 enum pres_reg_tables table; 1198 unsigned int reg_idx; 1199 1200 if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT) 1201 { 1202 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0 1203 : pres->ins[i].component_count - 1; 1204 1205 table = pres->ins[i].inputs[j].reg.table; 1206 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset 1207 + last_component_index); 1208 } 1209 else 1210 { 1211 table = pres->ins[i].inputs[j].index_reg.table; 1212 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset); 1213 } 1214 if (reg_idx >= pres->regs.table_sizes[table]) 1215 { 1216 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u.\n", 1217 i, j, table, reg_idx); 1218 return D3DXERR_INVALIDDATA; 1219 } 1220 } 1221 update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table, 1222 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset)); 1223 } 1224 if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED))) 1225 return E_OUTOFMEMORY; 1226 regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count); 1227 1228 return D3D_OK; 1229 } 1230 1231 HRESULT d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size, 1232 D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter, 1233 const char **skip_constants, unsigned int skip_constants_count) 1234 { 1235 struct d3dx_param_eval *peval; 1236 unsigned int *ptr, *shader_ptr = NULL; 1237 unsigned int i; 1238 BOOL shader; 1239 unsigned int count, pres_size; 1240 HRESULT ret; 1241 1242 TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n", 1243 base_effect, byte_code, byte_code_size, type, peval_out); 1244 1245 count = byte_code_size / sizeof(unsigned int); 1246 if (!byte_code || !count) 1247 { 1248 *peval_out = NULL; 1249 return D3D_OK; 1250 } 1251 1252 peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval)); 1253 if (!peval) 1254 { 1255 ret = E_OUTOFMEMORY; 1256 goto err_out; 1257 } 1258 peval->version_counter = version_counter; 1259 1260 peval->param_type = type; 1261 switch (type) 1262 { 1263 case D3DXPT_VERTEXSHADER: 1264 case D3DXPT_PIXELSHADER: 1265 shader = TRUE; 1266 break; 1267 default: 1268 shader = FALSE; 1269 break; 1270 } 1271 peval->shader_inputs.regset2table = shad_regset2table; 1272 1273 ptr = (unsigned int *)byte_code; 1274 if (shader) 1275 { 1276 if ((*ptr & 0xfffe0000) != 0xfffe0000) 1277 { 1278 FIXME("Invalid shader signature %#x.\n", *ptr); 1279 ret = D3DXERR_INVALIDDATA; 1280 goto err_out; 1281 } 1282 TRACE("Shader version %#x.\n", *ptr & 0xffff); 1283 shader_ptr = ptr; 1284 ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size); 1285 if (!ptr) 1286 TRACE("No preshader found.\n"); 1287 } 1288 else 1289 { 1290 pres_size = count; 1291 } 1292 1293 if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, base_effect))) 1294 { 1295 FIXME("Failed parsing preshader, byte code for analysis follows.\n"); 1296 dump_bytecode(byte_code, byte_code_size); 1297 goto err_out; 1298 } 1299 1300 if (shader) 1301 { 1302 if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, base_effect, 1303 skip_constants, skip_constants_count, &peval->pres))) 1304 { 1305 TRACE("Could not get shader constant table, hr %#x.\n", ret); 1306 goto err_out; 1307 } 1308 update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs); 1309 } 1310 1311 for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i) 1312 { 1313 if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i))) 1314 goto err_out; 1315 } 1316 1317 if (TRACE_ON(d3dx)) 1318 { 1319 dump_bytecode(byte_code, byte_code_size); 1320 dump_preshader(&peval->pres); 1321 if (shader) 1322 { 1323 TRACE("// Shader registers:\n"); 1324 dump_registers(&peval->shader_inputs); 1325 } 1326 } 1327 *peval_out = peval; 1328 TRACE("Created parameter evaluator %p.\n", *peval_out); 1329 return D3D_OK; 1330 1331 err_out: 1332 WARN("Error creating parameter evaluator.\n"); 1333 if (TRACE_ON(d3dx)) 1334 dump_bytecode(byte_code, byte_code_size); 1335 1336 d3dx_free_param_eval(peval); 1337 *peval_out = NULL; 1338 return ret; 1339 } 1340 1341 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab) 1342 { 1343 HeapFree(GetProcessHeap(), 0, ctab->inputs); 1344 HeapFree(GetProcessHeap(), 0, ctab->inputs_param); 1345 HeapFree(GetProcessHeap(), 0, ctab->const_set); 1346 } 1347 1348 static void d3dx_free_preshader(struct d3dx_preshader *pres) 1349 { 1350 HeapFree(GetProcessHeap(), 0, pres->ins); 1351 1352 regstore_free_tables(&pres->regs); 1353 d3dx_free_const_tab(&pres->inputs); 1354 } 1355 1356 void d3dx_free_param_eval(struct d3dx_param_eval *peval) 1357 { 1358 TRACE("peval %p.\n", peval); 1359 1360 if (!peval) 1361 return; 1362 1363 d3dx_free_preshader(&peval->pres); 1364 d3dx_free_const_tab(&peval->shader_inputs); 1365 HeapFree(GetProcessHeap(), 0, peval); 1366 } 1367 1368 static void pres_int_from_float(void *out, const void *in, unsigned int count) 1369 { 1370 unsigned int i; 1371 const float *in_float = in; 1372 int *out_int = out; 1373 1374 for (i = 0; i < count; ++i) 1375 out_int[i] = in_float[i]; 1376 } 1377 1378 static void pres_bool_from_value(void *out, const void *in, unsigned int count) 1379 { 1380 unsigned int i; 1381 const DWORD *in_dword = in; 1382 BOOL *out_bool = out; 1383 1384 for (i = 0; i < count; ++i) 1385 out_bool[i] = !!in_dword[i]; 1386 } 1387 1388 static void pres_float_from_int(void *out, const void *in, unsigned int count) 1389 { 1390 unsigned int i; 1391 const int *in_int = in; 1392 float *out_float = out; 1393 1394 for (i = 0; i < count; ++i) 1395 out_float[i] = in_int[i]; 1396 } 1397 1398 static void pres_float_from_bool(void *out, const void *in, unsigned int count) 1399 { 1400 unsigned int i; 1401 const BOOL *in_bool = in; 1402 float *out_float = out; 1403 1404 for (i = 0; i < count; ++i) 1405 out_float[i] = !!in_bool[i]; 1406 } 1407 1408 static void pres_int_from_bool(void *out, const void *in, unsigned int count) 1409 { 1410 unsigned int i; 1411 const float *in_bool = in; 1412 int *out_int = out; 1413 1414 for (i = 0; i < count; ++i) 1415 out_int[i] = !!in_bool[i]; 1416 } 1417 1418 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table, 1419 unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type) 1420 { 1421 typedef void (*conv_func)(void *out, const void *in, unsigned int count); 1422 static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] = 1423 { 1424 {NULL, NULL, pres_int_from_float, pres_bool_from_value}, 1425 {NULL, NULL, NULL, NULL}, 1426 {pres_float_from_int, NULL, NULL, pres_bool_from_value}, 1427 {pres_float_from_bool, NULL, pres_int_from_bool, NULL} 1428 }; 1429 enum pres_value_type table_type = table_info[table].type; 1430 1431 if (param_type == table_type) 1432 { 1433 regstore_set_values(rs, table, in, offset, count); 1434 return; 1435 } 1436 1437 set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count); 1438 } 1439 1440 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1441 D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr, 1442 unsigned int start, unsigned int count) 1443 { 1444 if (type == D3DXPT_VERTEXSHADER) 1445 { 1446 switch(table) 1447 { 1448 case PRES_REGTAB_OCONST: 1449 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count); 1450 case PRES_REGTAB_OICONST: 1451 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count); 1452 case PRES_REGTAB_OBCONST: 1453 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count); 1454 default: 1455 FIXME("Unexpected register table %u.\n", table); 1456 return D3DERR_INVALIDCALL; 1457 } 1458 } 1459 else if (type == D3DXPT_PIXELSHADER) 1460 { 1461 switch(table) 1462 { 1463 case PRES_REGTAB_OCONST: 1464 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count); 1465 case PRES_REGTAB_OICONST: 1466 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count); 1467 case PRES_REGTAB_OBCONST: 1468 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count); 1469 default: 1470 FIXME("Unexpected register table %u.\n", table); 1471 return D3DERR_INVALIDCALL; 1472 } 1473 } 1474 else 1475 { 1476 FIXME("Unexpected parameter type %u.\n", type); 1477 return D3DERR_INVALIDCALL; 1478 } 1479 } 1480 1481 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab, 1482 ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1483 D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty) 1484 { 1485 unsigned int const_idx; 1486 unsigned int current_start = 0, current_count = 0; 1487 enum pres_reg_tables current_table = PRES_REGTAB_COUNT; 1488 BOOL update_device = manager || device; 1489 HRESULT hr, result = D3D_OK; 1490 ULONG64 update_version = const_tab->update_version; 1491 1492 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1493 { 1494 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1495 enum pres_reg_tables table = const_set->table; 1496 struct d3dx_parameter *param = const_set->param; 1497 unsigned int element, i, j, start_offset; 1498 struct const_upload_info info; 1499 unsigned int *data; 1500 enum pres_value_type param_type; 1501 1502 if (!(param && is_param_dirty(param, update_version))) 1503 continue; 1504 1505 data = param->data; 1506 start_offset = get_offset_reg(table, const_set->register_index); 1507 if (const_set->direct_copy) 1508 { 1509 regstore_set_values(rs, table, data, start_offset, 1510 get_offset_reg(table, const_set->register_count)); 1511 continue; 1512 } 1513 param_type = table_type_from_param_type(param->type); 1514 if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR) 1515 { 1516 unsigned int count = max(param->rows, param->columns); 1517 1518 if (count >= get_reg_components(table)) 1519 { 1520 regstore_set_data(rs, table, start_offset, data, 1521 count * const_set->element_count, param_type); 1522 } 1523 else 1524 { 1525 for (element = 0; element < const_set->element_count; ++element) 1526 regstore_set_data(rs, table, start_offset + get_offset_reg(table, element), 1527 &data[element * count], count, param_type); 1528 } 1529 continue; 1530 } 1531 get_const_upload_info(const_set, &info); 1532 for (element = 0; element < const_set->element_count; ++element) 1533 { 1534 unsigned int *out = (unsigned int *)rs->tables[table] + start_offset; 1535 1536 /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer. 1537 * All the supported types of parameters and table values have the same size. */ 1538 if (info.transpose) 1539 { 1540 for (i = 0; i < info.major_count; ++i) 1541 for (j = 0; j < info.minor; ++j) 1542 out[i * info.major_stride + j] = data[i + j * info.major]; 1543 1544 for (j = 0; j < info.minor_remainder; ++j) 1545 out[i * info.major_stride + j] = data[i + j * info.major]; 1546 } 1547 else 1548 { 1549 for (i = 0; i < info.major_count; ++i) 1550 for (j = 0; j < info.minor; ++j) 1551 out[i * info.major_stride + j] = data[i * info.minor + j]; 1552 } 1553 start_offset += get_offset_reg(table, const_set->register_count); 1554 data += param->rows * param->columns; 1555 } 1556 start_offset = get_offset_reg(table, const_set->register_index); 1557 if (table_info[table].type != param_type) 1558 regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset, 1559 get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type); 1560 } 1561 const_tab->update_version = new_update_version; 1562 if (!update_device) 1563 return D3D_OK; 1564 1565 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1566 { 1567 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1568 1569 if (device_update_all || (const_set->param 1570 ? is_param_dirty(const_set->param, update_version) : pres_dirty)) 1571 { 1572 enum pres_reg_tables table = const_set->table; 1573 1574 if (table == current_table && current_start + current_count == const_set->register_index) 1575 { 1576 current_count += const_set->register_count * const_set->element_count; 1577 } 1578 else 1579 { 1580 if (current_count) 1581 { 1582 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1583 (DWORD *)rs->tables[current_table] 1584 + get_offset_reg(current_table, current_start), current_start, current_count))) 1585 result = hr; 1586 } 1587 current_table = table; 1588 current_start = const_set->register_index; 1589 current_count = const_set->register_count * const_set->element_count; 1590 } 1591 } 1592 } 1593 if (current_count) 1594 { 1595 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1596 (DWORD *)rs->tables[current_table] 1597 + get_offset_reg(current_table, current_start), current_start, current_count))) 1598 result = hr; 1599 } 1600 return result; 1601 } 1602 1603 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset) 1604 { 1605 return regstore_get_double(rs, table, offset); 1606 } 1607 1608 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp) 1609 { 1610 unsigned int offset, base_index, reg_index, table; 1611 1612 table = opr->reg.table; 1613 1614 if (opr->index_reg.table == PRES_REGTAB_COUNT) 1615 base_index = 0; 1616 else 1617 base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset)); 1618 1619 offset = get_offset_reg(table, base_index) + opr->reg.offset + comp; 1620 reg_index = get_reg_offset(table, offset); 1621 1622 if (reg_index >= rs->table_sizes[table]) 1623 { 1624 unsigned int wrap_size; 1625 1626 if (table == PRES_REGTAB_CONST) 1627 { 1628 /* As it can be guessed from tests, offset into floating constant table is wrapped 1629 * to the nearest power of 2 and not to the actual table size. */ 1630 for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1) 1631 ; 1632 } 1633 else 1634 { 1635 wrap_size = rs->table_sizes[table]; 1636 } 1637 WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n", 1638 reg_index, table, wrap_size, rs->table_sizes[table]); 1639 reg_index %= wrap_size; 1640 1641 if (reg_index >= rs->table_sizes[table]) 1642 return 0.0; 1643 1644 offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table); 1645 } 1646 1647 return exec_get_reg_value(rs, table, offset); 1648 } 1649 1650 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg, 1651 unsigned int comp, double res) 1652 { 1653 regstore_set_double(rs, reg->table, reg->offset + comp, res); 1654 } 1655 1656 #define ARGS_ARRAY_SIZE 8 1657 static HRESULT execute_preshader(struct d3dx_preshader *pres) 1658 { 1659 unsigned int i, j, k; 1660 double args[ARGS_ARRAY_SIZE]; 1661 double res; 1662 1663 for (i = 0; i < pres->ins_count; ++i) 1664 { 1665 const struct d3dx_pres_ins *ins; 1666 const struct op_info *oi; 1667 1668 ins = &pres->ins[i]; 1669 oi = &pres_op_info[ins->op]; 1670 if (oi->func_all_comps) 1671 { 1672 if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE) 1673 { 1674 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count); 1675 return E_FAIL; 1676 } 1677 for (k = 0; k < oi->input_count; ++k) 1678 for (j = 0; j < ins->component_count; ++j) 1679 args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k], 1680 ins->scalar_op && !k ? 0 : j); 1681 res = oi->func(args, ins->component_count); 1682 1683 /* only 'dot' instruction currently falls here */ 1684 exec_set_arg(&pres->regs, &ins->output.reg, 0, res); 1685 } 1686 else 1687 { 1688 for (j = 0; j < ins->component_count; ++j) 1689 { 1690 for (k = 0; k < oi->input_count; ++k) 1691 args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j); 1692 res = oi->func(args, ins->component_count); 1693 exec_set_arg(&pres->regs, &ins->output.reg, j, res); 1694 } 1695 } 1696 } 1697 return D3D_OK; 1698 } 1699 1700 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version) 1701 { 1702 unsigned int i; 1703 1704 if (update_version == ULONG64_MAX) 1705 update_version = ctab->update_version; 1706 for (i = 0; i < ctab->input_count; ++i) 1707 { 1708 if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]), 1709 update_version)) 1710 return TRUE; 1711 } 1712 return FALSE; 1713 } 1714 1715 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version) 1716 { 1717 return is_const_tab_input_dirty(&peval->pres.inputs, update_version) 1718 || is_const_tab_input_dirty(&peval->shader_inputs, update_version); 1719 } 1720 1721 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param, 1722 void *param_value) 1723 { 1724 HRESULT hr; 1725 unsigned int i; 1726 unsigned int elements, elements_param, elements_table; 1727 float *oc; 1728 1729 TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value); 1730 1731 if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX)) 1732 { 1733 set_constants(&peval->pres.regs, &peval->pres.inputs, 1734 next_update_version(peval->version_counter), 1735 NULL, NULL, peval->param_type, FALSE, FALSE); 1736 1737 if (FAILED(hr = execute_preshader(&peval->pres))) 1738 return hr; 1739 } 1740 1741 elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]); 1742 elements_param = param->bytes / sizeof(unsigned int); 1743 elements = min(elements_table, elements_param); 1744 oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST]; 1745 for (i = 0; i < elements; ++i) 1746 set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT); 1747 return D3D_OK; 1748 } 1749 1750 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1751 struct d3dx_param_eval *peval, BOOL update_all) 1752 { 1753 HRESULT hr; 1754 struct d3dx_preshader *pres = &peval->pres; 1755 struct d3dx_regstore *rs = &pres->regs; 1756 ULONG64 new_update_version = next_update_version(peval->version_counter); 1757 BOOL pres_dirty = FALSE; 1758 1759 TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type); 1760 1761 if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX)) 1762 { 1763 set_constants(rs, &pres->inputs, new_update_version, 1764 NULL, NULL, peval->param_type, FALSE, FALSE); 1765 if (FAILED(hr = execute_preshader(pres))) 1766 return hr; 1767 pres_dirty = TRUE; 1768 } 1769 1770 return set_constants(rs, &peval->shader_inputs, new_update_version, 1771 manager, device, peval->param_type, update_all, pres_dirty); 1772 } 1773