1 /* 2 * Copyright 2016 Paul Gofman 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * This library is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with this library; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 17 */ 18 19 #include "config.h" 20 #include "wine/port.h" 21 22 #include "d3dx9_private.h" 23 24 #include <float.h> 25 #include <assert.h> 26 27 WINE_DEFAULT_DEBUG_CHANNEL(d3dx); 28 29 #ifdef __REACTOS__ 30 /* ReactOS FIXME: Insect */ 31 #define fmin min 32 #define fmax max 33 #endif 34 35 enum pres_ops 36 { 37 PRESHADER_OP_NOP, 38 PRESHADER_OP_MOV, 39 PRESHADER_OP_NEG, 40 PRESHADER_OP_RCP, 41 PRESHADER_OP_FRC, 42 PRESHADER_OP_EXP, 43 PRESHADER_OP_LOG, 44 PRESHADER_OP_RSQ, 45 PRESHADER_OP_SIN, 46 PRESHADER_OP_COS, 47 PRESHADER_OP_ASIN, 48 PRESHADER_OP_ACOS, 49 PRESHADER_OP_ATAN, 50 PRESHADER_OP_MIN, 51 PRESHADER_OP_MAX, 52 PRESHADER_OP_LT, 53 PRESHADER_OP_GE, 54 PRESHADER_OP_ADD, 55 PRESHADER_OP_MUL, 56 PRESHADER_OP_ATAN2, 57 PRESHADER_OP_DIV, 58 PRESHADER_OP_CMP, 59 PRESHADER_OP_DOT, 60 PRESHADER_OP_DOTSWIZ6, 61 PRESHADER_OP_DOTSWIZ8, 62 }; 63 64 typedef double (*pres_op_func)(double *args, int n); 65 66 static double to_signed_nan(double v) 67 { 68 static const union 69 { 70 ULONG64 ulong64_value; 71 double double_value; 72 } 73 signed_nan = 74 { 75 0xfff8000000000000 76 }; 77 78 return isnan(v) ? signed_nan.double_value : v; 79 } 80 81 static double pres_mov(double *args, int n) {return args[0];} 82 static double pres_add(double *args, int n) {return args[0] + args[1];} 83 static double pres_mul(double *args, int n) {return args[0] * args[1];} 84 static double pres_dot(double *args, int n) 85 { 86 int i; 87 double sum; 88 89 sum = 0.0; 90 for (i = 0; i < n; ++i) 91 sum += args[i] * args[i + n]; 92 return sum; 93 } 94 95 static double pres_dotswiz6(double *args, int n) 96 { 97 return pres_dot(args, 3); 98 } 99 100 static double pres_dotswiz8(double *args, int n) 101 { 102 return pres_dot(args, 4); 103 } 104 105 static double pres_neg(double *args, int n) {return -args[0];} 106 static double pres_rcp(double *args, int n) {return 1.0 / args[0];} 107 static double pres_lt(double *args, int n) {return args[0] < args[1] ? 1.0 : 0.0;} 108 static double pres_ge(double *args, int n) {return args[0] >= args[1] ? 1.0 : 0.0;} 109 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);} 110 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);} 111 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);} 112 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];} 113 static double pres_sin(double *args, int n) {return sin(args[0]);} 114 static double pres_cos(double *args, int n) {return cos(args[0]);} 115 static double pres_rsq(double *args, int n) 116 { 117 double v; 118 119 v = fabs(args[0]); 120 if (v == 0.0) 121 return INFINITY; 122 else 123 return 1.0 / sqrt(v); 124 } 125 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);} 126 static double pres_log(double *args, int n) 127 { 128 double v; 129 130 v = fabs(args[0]); 131 if (v == 0.0) 132 return 0.0; 133 else 134 #ifdef HAVE_LOG2 135 return log2(v); 136 #else 137 return log(v) / log(2); 138 #endif 139 } 140 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));} 141 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));} 142 static double pres_atan(double *args, int n) {return atan(args[0]);} 143 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);} 144 145 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever 146 * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */ 147 static double pres_div(double *args, int n) {return 0.0;} 148 149 #define PRES_OPCODE_MASK 0x7ff00000 150 #define PRES_OPCODE_SHIFT 20 151 #define PRES_SCALAR_FLAG 0x80000000 152 #define PRES_NCOMP_MASK 0x0000ffff 153 154 #define FOURCC_PRES 0x53455250 155 #define FOURCC_CLIT 0x54494c43 156 #define FOURCC_FXLC 0x434c5846 157 #define FOURCC_PRSI 0x49535250 158 #define PRES_SIGN 0x46580000 159 160 struct op_info 161 { 162 unsigned int opcode; 163 char mnem[16]; 164 unsigned int input_count; 165 BOOL func_all_comps; 166 pres_op_func func; 167 }; 168 169 static const struct op_info pres_op_info[] = 170 { 171 {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */ 172 {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */ 173 {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */ 174 {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */ 175 {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */ 176 {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */ 177 {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */ 178 {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */ 179 {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */ 180 {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */ 181 {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */ 182 {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */ 183 {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */ 184 {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */ 185 {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */ 186 {0x202, "lt", 2, 0, pres_lt }, /* PRESHADER_OP_LT */ 187 {0x203, "ge", 2, 0, pres_ge }, /* PRESHADER_OP_GE */ 188 {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */ 189 {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */ 190 {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */ 191 {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */ 192 {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */ 193 {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */ 194 {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */ 195 {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */ 196 }; 197 198 enum pres_value_type 199 { 200 PRES_VT_FLOAT, 201 PRES_VT_DOUBLE, 202 PRES_VT_INT, 203 PRES_VT_BOOL, 204 PRES_VT_COUNT 205 }; 206 207 static const struct 208 { 209 unsigned int component_size; 210 enum pres_value_type type; 211 } 212 table_info[] = 213 { 214 {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */ 215 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */ 216 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */ 217 {sizeof(BOOL), PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */ 218 {sizeof(int), PRES_VT_INT, }, /* PRES_REGTAB_OICONST */ 219 /* TODO: use double precision for 64 bit */ 220 {sizeof(float), PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */ 221 }; 222 223 static const char *table_symbol[] = 224 { 225 "imm", "c", "oc", "ob", "oi", "r", "(null)", 226 }; 227 228 static const enum pres_reg_tables pres_regset2table[] = 229 { 230 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 231 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 232 PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */ 233 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 234 }; 235 236 static const enum pres_reg_tables shad_regset2table[] = 237 { 238 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 239 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 240 PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */ 241 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 242 }; 243 244 struct d3dx_pres_reg 245 { 246 enum pres_reg_tables table; 247 /* offset is component index, not register index, e. g. 248 offset for component c3.y is 13 (3 * 4 + 1) */ 249 unsigned int offset; 250 }; 251 252 struct d3dx_pres_operand 253 { 254 struct d3dx_pres_reg reg; 255 struct d3dx_pres_reg index_reg; 256 }; 257 258 #define MAX_INPUTS_COUNT 8 259 260 struct d3dx_pres_ins 261 { 262 enum pres_ops op; 263 /* first input argument is scalar, 264 scalar component is propagated */ 265 BOOL scalar_op; 266 unsigned int component_count; 267 struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT]; 268 struct d3dx_pres_operand output; 269 }; 270 271 struct const_upload_info 272 { 273 BOOL transpose; 274 unsigned int major, minor; 275 unsigned int major_stride; 276 unsigned int major_count; 277 unsigned int count; 278 unsigned int minor_remainder; 279 }; 280 281 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type) 282 { 283 switch (type) 284 { 285 case D3DXPT_FLOAT: 286 return PRES_VT_FLOAT; 287 case D3DXPT_INT: 288 return PRES_VT_INT; 289 case D3DXPT_BOOL: 290 return PRES_VT_BOOL; 291 default: 292 FIXME("Unsupported type %u.\n", type); 293 return PRES_VT_COUNT; 294 } 295 } 296 297 static unsigned int get_reg_offset(unsigned int table, unsigned int offset) 298 { 299 return table == PRES_REGTAB_OBCONST ? offset : offset >> 2; 300 } 301 302 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx) 303 { 304 return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2; 305 } 306 307 static unsigned int get_reg_components(unsigned int table) 308 { 309 return get_offset_reg(table, 1); 310 } 311 312 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8) 313 314 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table) 315 { 316 unsigned int size; 317 318 size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size; 319 if (size) 320 { 321 rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size); 322 if (!rs->tables[table]) 323 return E_OUTOFMEMORY; 324 } 325 return D3D_OK; 326 } 327 328 static void regstore_free_tables(struct d3dx_regstore *rs) 329 { 330 unsigned int i; 331 332 for (i = 0; i < PRES_REGTAB_COUNT; ++i) 333 { 334 HeapFree(GetProcessHeap(), 0, rs->tables[i]); 335 } 336 } 337 338 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data, 339 unsigned int start_offset, unsigned int count) 340 { 341 BYTE *dst = rs->tables[table]; 342 const BYTE *src = data; 343 unsigned int size; 344 345 dst += start_offset * table_info[table].component_size; 346 size = count * table_info[table].component_size; 347 assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst)); 348 memcpy(dst, src, size); 349 } 350 351 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset) 352 { 353 BYTE *p; 354 355 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 356 switch (table_info[table].type) 357 { 358 case PRES_VT_FLOAT: 359 return *(float *)p; 360 case PRES_VT_DOUBLE: 361 return *(double *)p; 362 default: 363 FIXME("Unexpected preshader input from table %u.\n", table); 364 return NAN; 365 } 366 } 367 368 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v) 369 { 370 BYTE *p; 371 372 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 373 switch (table_info[table].type) 374 { 375 case PRES_VT_FLOAT : *(float *)p = v; break; 376 case PRES_VT_DOUBLE: *(double *)p = v; break; 377 case PRES_VT_INT : *(int *)p = lrint(v); break; 378 case PRES_VT_BOOL : *(BOOL *)p = !!v; break; 379 default: 380 FIXME("Bad type %u.\n", table_info[table].type); 381 break; 382 } 383 } 384 385 static void dump_bytecode(void *data, unsigned int size) 386 { 387 unsigned int *bytecode = (unsigned int *)data; 388 unsigned int i, j, n; 389 390 size /= sizeof(*bytecode); 391 i = 0; 392 while (i < size) 393 { 394 n = min(size - i, 8); 395 for (j = 0; j < n; ++j) 396 TRACE("0x%08x,", bytecode[i + j]); 397 i += n; 398 TRACE("\n"); 399 } 400 } 401 402 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count, 403 unsigned int fourcc, unsigned int *size) 404 { 405 /* Provide at least one value in comment section on non-NULL return. */ 406 while (count > 2 && (*ptr & 0xffff) == 0xfffe) 407 { 408 unsigned int section_size; 409 410 section_size = (*ptr >> 16); 411 if (!section_size || section_size + 1 > count) 412 break; 413 if (*(ptr + 1) == fourcc) 414 { 415 *size = section_size; 416 return ptr + 2; 417 } 418 count -= section_size + 1; 419 ptr += section_size + 1; 420 } 421 return NULL; 422 } 423 424 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg) 425 { 426 static const enum pres_reg_tables reg_table[8] = 427 { 428 PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT, 429 PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP 430 }; 431 432 if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT) 433 { 434 FIXME("Unsupported register table %#x.\n", *ptr); 435 return NULL; 436 } 437 438 reg->table = reg_table[*ptr++]; 439 reg->offset = *ptr++; 440 return ptr; 441 } 442 443 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr) 444 { 445 if (count < 3 || (*ptr && count < 5)) 446 { 447 WARN("Byte code buffer ends unexpectedly, count %u.\n", count); 448 return NULL; 449 } 450 451 if (*ptr) 452 { 453 if (*ptr != 1) 454 { 455 FIXME("Unknown relative addressing flag, word %#x.\n", *ptr); 456 return NULL; 457 } 458 ptr = parse_pres_reg(ptr + 1, &opr->index_reg); 459 if (!ptr) 460 return NULL; 461 } 462 else 463 { 464 opr->index_reg.table = PRES_REGTAB_COUNT; 465 ++ptr; 466 } 467 468 ptr = parse_pres_reg(ptr, &opr->reg); 469 470 if (opr->reg.table == PRES_REGTAB_OBCONST) 471 opr->reg.offset /= 4; 472 return ptr; 473 } 474 475 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins) 476 { 477 unsigned int ins_code, ins_raw; 478 unsigned int input_count; 479 unsigned int i; 480 481 if (count < 2) 482 { 483 WARN("Byte code buffer ends unexpectedly.\n"); 484 return NULL; 485 } 486 487 ins_raw = *ptr++; 488 ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT; 489 ins->component_count = ins_raw & PRES_NCOMP_MASK; 490 ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG); 491 492 if (ins->component_count < 1 || ins->component_count > 4) 493 { 494 FIXME("Unsupported number of components %u.\n", ins->component_count); 495 return NULL; 496 } 497 input_count = *ptr++; 498 count -= 2; 499 for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i) 500 if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count) 501 break; 502 if (i == ARRAY_SIZE(pres_op_info)) 503 { 504 FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw); 505 return NULL; 506 } 507 ins->op = i; 508 if (input_count > ARRAY_SIZE(ins->inputs)) 509 { 510 FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count, 511 pres_op_info[i].mnem); 512 return NULL; 513 } 514 for (i = 0; i < input_count; ++i) 515 { 516 unsigned int *p; 517 518 p = parse_pres_arg(ptr, count, &ins->inputs[i]); 519 if (!p) 520 return NULL; 521 count -= p - ptr; 522 ptr = p; 523 } 524 ptr = parse_pres_arg(ptr, count, &ins->output); 525 if (ins->output.index_reg.table != PRES_REGTAB_COUNT) 526 { 527 FIXME("Relative addressing in output register not supported.\n"); 528 return NULL; 529 } 530 if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset 531 + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1)) 532 != get_reg_offset(ins->output.reg.table, ins->output.reg.offset)) 533 { 534 FIXME("Instructions outputting multiple registers are not supported.\n"); 535 return NULL; 536 } 537 return ptr; 538 } 539 540 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc, 541 WORD *constantinfo_reserved) 542 { 543 const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc); 544 545 if (!constant) 546 { 547 FIXME("Could not get constant desc.\n"); 548 if (constantinfo_reserved) 549 *constantinfo_reserved = 0; 550 return D3DERR_INVALIDCALL; 551 } 552 *desc = constant->desc; 553 if (constantinfo_reserved) 554 *constantinfo_reserved = constant->constantinfo_reserved; 555 return D3D_OK; 556 } 557 558 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set, 559 struct const_upload_info *info) 560 { 561 struct d3dx_parameter *param = const_set->param; 562 unsigned int table = const_set->table; 563 564 info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS) 565 || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS); 566 if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS) 567 { 568 info->major = param->columns; 569 info->minor = param->rows; 570 } 571 else 572 { 573 info->major = param->rows; 574 info->minor = param->columns; 575 } 576 577 if (get_reg_components(table) == 1) 578 { 579 unsigned int const_length = get_offset_reg(table, const_set->register_count); 580 581 info->major_stride = info->minor; 582 info->major_count = const_length / info->major_stride; 583 info->minor_remainder = const_length % info->major_stride; 584 } 585 else 586 { 587 info->major_stride = get_reg_components(table); 588 info->major_count = const_set->register_count; 589 info->minor_remainder = 0; 590 } 591 info->count = info->major_count * info->minor + info->minor_remainder; 592 } 593 594 #define INITIAL_CONST_SET_SIZE 16 595 596 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set) 597 { 598 if (const_tab->const_set_count >= const_tab->const_set_size) 599 { 600 unsigned int new_size; 601 struct d3dx_const_param_eval_output *new_alloc; 602 603 if (!const_tab->const_set_size) 604 { 605 new_size = INITIAL_CONST_SET_SIZE; 606 new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size); 607 if (!new_alloc) 608 { 609 ERR("Out of memory.\n"); 610 return E_OUTOFMEMORY; 611 } 612 } 613 else 614 { 615 new_size = const_tab->const_set_size * 2; 616 new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set, 617 sizeof(*const_tab->const_set) * new_size); 618 if (!new_alloc) 619 { 620 ERR("Out of memory.\n"); 621 return E_OUTOFMEMORY; 622 } 623 } 624 const_tab->const_set = new_alloc; 625 const_tab->const_set_size = new_size; 626 } 627 const_tab->const_set[const_tab->const_set_count++] = *set; 628 return D3D_OK; 629 } 630 631 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab, 632 struct d3dx_preshader *pres) 633 { 634 unsigned int i; 635 struct d3dx_const_param_eval_output const_set = {NULL}; 636 637 for (i = 0; i < pres->ins_count; ++i) 638 { 639 const struct d3dx_pres_ins *ins = &pres->ins[i]; 640 const struct d3dx_pres_reg *reg = &ins->output.reg; 641 642 if (reg->table == PRES_REGTAB_TEMP) 643 continue; 644 645 const_set.register_index = get_reg_offset(reg->table, reg->offset); 646 const_set.register_count = 1; 647 const_set.table = reg->table; 648 const_set.constant_class = D3DXPC_FORCE_DWORD; 649 const_set.element_count = 1; 650 append_const_set(const_tab, &const_set); 651 } 652 } 653 654 static int compare_const_set(const void *a, const void *b) 655 { 656 const struct d3dx_const_param_eval_output *r1 = a; 657 const struct d3dx_const_param_eval_output *r2 = b; 658 659 if (r1->table != r2->table) 660 return r1->table - r2->table; 661 return r1->register_index - r2->register_index; 662 } 663 664 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab, 665 struct d3dx_parameter *param, unsigned int index) 666 { 667 unsigned int i, start_index = index; 668 DWORD *current_data; 669 enum pres_reg_tables current_table; 670 unsigned int current_start_offset, element_count; 671 struct d3dx_const_param_eval_output *first_const; 672 673 if (!const_tab->const_set_count) 674 return D3D_OK; 675 676 while (index < const_tab->const_set_count - 1) 677 { 678 first_const = &const_tab->const_set[index]; 679 current_data = first_const->param->data; 680 current_table = first_const->table; 681 current_start_offset = get_offset_reg(current_table, first_const->register_index); 682 element_count = 0; 683 for (i = index; i < const_tab->const_set_count; ++i) 684 { 685 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i]; 686 unsigned int count = get_offset_reg(const_set->table, 687 const_set->register_count * const_set->element_count); 688 unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index); 689 690 if (!(const_set->table == current_table && current_start_offset == start_offset 691 && const_set->direct_copy == first_const->direct_copy 692 && current_data == const_set->param->data 693 && (const_set->direct_copy || (first_const->param->type == const_set->param->type 694 && first_const->param->class == const_set->param->class 695 && first_const->param->columns == const_set->param->columns 696 && first_const->param->rows == const_set->param->rows 697 && first_const->register_count == const_set->register_count 698 && (i == const_tab->const_set_count - 1 699 || first_const->param->element_count == const_set->param->element_count))))) 700 break; 701 702 current_start_offset += count; 703 current_data += const_set->direct_copy ? count : const_set->param->rows 704 * const_set->param->columns * const_set->element_count; 705 element_count += const_set->element_count; 706 } 707 708 if (i > index + 1) 709 { 710 TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index, 711 debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy); 712 713 first_const->element_count = element_count; 714 if (first_const->direct_copy) 715 { 716 first_const->element_count = 1; 717 if (index == start_index 718 && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT)) 719 { 720 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 721 return D3DERR_INVALIDCALL; 722 first_const->param = param; 723 } 724 first_const->register_count = get_reg_offset(current_table, current_start_offset) 725 - first_const->register_index; 726 } 727 memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i], 728 sizeof(*const_tab->const_set) * (const_tab->const_set_count - i)); 729 const_tab->const_set_count -= i - index - 1; 730 } 731 else 732 { 733 TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n", 734 const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy); 735 } 736 index = i; 737 } 738 return D3D_OK; 739 } 740 741 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab, 742 D3DXHANDLE hc, struct d3dx_parameter *param) 743 { 744 D3DXCONSTANT_DESC desc; 745 unsigned int const_count, param_count, i; 746 BOOL get_element; 747 struct d3dx_const_param_eval_output const_set; 748 struct const_upload_info info; 749 enum pres_value_type table_type; 750 HRESULT hr; 751 752 if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL))) 753 return D3DERR_INVALIDCALL; 754 755 if (param->element_count) 756 { 757 param_count = param->element_count; 758 const_count = desc.Elements; 759 get_element = TRUE; 760 } 761 else 762 { 763 if (desc.Elements > 1) 764 { 765 FIXME("Unexpected number of constant elements %u.\n", desc.Elements); 766 return D3DERR_INVALIDCALL; 767 } 768 param_count = param->member_count; 769 const_count = desc.StructMembers; 770 get_element = FALSE; 771 } 772 if (const_count != param_count) 773 { 774 FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n", 775 param_count, const_count); 776 return D3DERR_INVALIDCALL; 777 } 778 if (const_count) 779 { 780 HRESULT ret = D3D_OK; 781 D3DXHANDLE hc_element; 782 unsigned int index = const_tab->const_set_count; 783 784 for (i = 0; i < const_count; ++i) 785 { 786 if (get_element) 787 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i); 788 else 789 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i); 790 if (!hc_element) 791 { 792 FIXME("Could not get constant.\n"); 793 hr = D3DERR_INVALIDCALL; 794 } 795 else 796 { 797 hr = init_set_constants_param(const_tab, ctab, hc_element, ¶m->members[i]); 798 } 799 if (FAILED(hr)) 800 ret = hr; 801 } 802 if (FAILED(ret)) 803 return ret; 804 return merge_const_set_entries(const_tab, param, index); 805 } 806 807 TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n", 808 debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes); 809 TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n", 810 debugstr_a(param->name), param->rows, param->columns, param->class, 811 param->flags, param->bytes); 812 813 const_set.element_count = 1; 814 const_set.param = param; 815 const_set.constant_class = desc.Class; 816 if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table)) 817 { 818 FIXME("Unknown register set %u.\n", desc.RegisterSet); 819 return D3DERR_INVALIDCALL; 820 } 821 const_set.register_index = desc.RegisterIndex; 822 const_set.table = const_tab->regset2table[desc.RegisterSet]; 823 if (const_set.table >= PRES_REGTAB_COUNT) 824 { 825 ERR("Unexpected register set %u.\n", desc.RegisterSet); 826 return D3DERR_INVALIDCALL; 827 } 828 assert(table_info[const_set.table].component_size == sizeof(unsigned int)); 829 assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int)); 830 const_set.register_count = desc.RegisterCount; 831 table_type = table_info[const_set.table].type; 832 get_const_upload_info(&const_set, &info); 833 if (!info.count) 834 { 835 TRACE("%s has zero count, skipping.\n", debugstr_a(param->name)); 836 return D3D_OK; 837 } 838 839 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 840 return D3DERR_INVALIDCALL; 841 842 const_set.direct_copy = table_type_from_param_type(param->type) == table_type 843 && !info.transpose && info.minor == info.major_stride 844 && info.count == get_offset_reg(const_set.table, const_set.register_count) 845 && info.count * sizeof(unsigned int) <= param->bytes; 846 if (info.minor_remainder && !const_set.direct_copy && !info.transpose) 847 FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n", 848 debugstr_a(param->name)); 849 850 if (info.major_count > info.major 851 || (info.major_count == info.major && info.minor_remainder)) 852 { 853 WARN("Constant dimensions exceed parameter size.\n"); 854 return D3DERR_INVALIDCALL; 855 } 856 857 if (FAILED(hr = append_const_set(const_tab, &const_set))) 858 return hr; 859 860 return D3D_OK; 861 } 862 863 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out, 864 struct d3dx9_base_effect *base, const char **skip_constants, 865 unsigned int skip_constants_count, struct d3dx_preshader *pres) 866 { 867 ID3DXConstantTable *ctab; 868 D3DXCONSTANT_DESC *cdesc; 869 struct d3dx_parameter **inputs_param; 870 D3DXCONSTANTTABLE_DESC desc; 871 HRESULT hr; 872 D3DXHANDLE hc; 873 unsigned int i, j; 874 875 hr = D3DXGetShaderConstantTable(byte_code, &ctab); 876 if (FAILED(hr) || !ctab) 877 { 878 TRACE("Could not get CTAB data, hr %#x.\n", hr); 879 /* returning OK, shaders and preshaders without CTAB are valid */ 880 return D3D_OK; 881 } 882 if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc))) 883 { 884 FIXME("Could not get CTAB desc, hr %#x.\n", hr); 885 goto cleanup; 886 } 887 888 out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants); 889 out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants); 890 if (!cdesc || !inputs_param) 891 { 892 hr = E_OUTOFMEMORY; 893 goto cleanup; 894 } 895 896 for (i = 0; i < desc.Constants; ++i) 897 { 898 unsigned int index = out->input_count; 899 WORD constantinfo_reserved; 900 901 hc = ID3DXConstantTable_GetConstant(ctab, NULL, i); 902 if (!hc) 903 { 904 FIXME("Null constant handle.\n"); 905 goto cleanup; 906 } 907 if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved))) 908 goto cleanup; 909 inputs_param[index] = get_parameter_by_name(base, NULL, cdesc[index].Name); 910 if (!inputs_param[index]) 911 { 912 WARN("Could not find parameter %s in effect.\n", cdesc[index].Name); 913 continue; 914 } 915 if (cdesc[index].Class == D3DXPC_OBJECT) 916 { 917 TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]); 918 if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT 919 || !is_param_type_sampler(inputs_param[index]->type)) 920 { 921 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name)); 922 hr = D3DERR_INVALIDCALL; 923 goto cleanup; 924 } 925 if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount) 926 { 927 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name)); 928 hr = D3DERR_INVALIDCALL; 929 goto cleanup; 930 } 931 } 932 if (!is_top_level_parameter(inputs_param[index])) 933 { 934 WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name)); 935 hr = E_FAIL; 936 goto cleanup; 937 } 938 939 for (j = 0; j < skip_constants_count; ++j) 940 { 941 if (!strcmp(cdesc[index].Name, skip_constants[j])) 942 { 943 if (!constantinfo_reserved) 944 { 945 WARN("skip_constants parameter %s is not register bound.\n", 946 cdesc[index].Name); 947 hr = D3DERR_INVALIDCALL; 948 goto cleanup; 949 } 950 TRACE("Skipping constant %s.\n", cdesc[index].Name); 951 break; 952 } 953 } 954 if (j < skip_constants_count) 955 continue; 956 ++out->input_count; 957 if (inputs_param[index]->class == D3DXPC_OBJECT) 958 continue; 959 if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index]))) 960 goto cleanup; 961 } 962 if (pres) 963 append_pres_const_sets_for_shader_input(out, pres); 964 if (out->const_set_count) 965 { 966 struct d3dx_const_param_eval_output *new_alloc; 967 968 qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set); 969 970 i = 0; 971 while (i < out->const_set_count - 1) 972 { 973 if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD 974 && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD 975 && out->const_set[i].table == out->const_set[i + 1].table 976 && out->const_set[i].register_index + out->const_set[i].register_count 977 >= out->const_set[i + 1].register_index) 978 { 979 assert(out->const_set[i].register_index + out->const_set[i].register_count 980 <= out->const_set[i + 1].register_index + 1); 981 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1 982 - out->const_set[i].register_index; 983 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i]) 984 * (out->const_set_count - i - 2)); 985 --out->const_set_count; 986 } 987 else 988 { 989 ++i; 990 } 991 } 992 993 new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set, 994 sizeof(*out->const_set) * out->const_set_count); 995 if (new_alloc) 996 { 997 out->const_set = new_alloc; 998 out->const_set_size = out->const_set_count; 999 } 1000 else 1001 { 1002 WARN("Out of memory.\n"); 1003 } 1004 } 1005 cleanup: 1006 ID3DXConstantTable_Release(ctab); 1007 return hr; 1008 } 1009 1010 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register) 1011 { 1012 if (table < PRES_REGTAB_COUNT) 1013 table_sizes[table] = max(table_sizes[table], max_register + 1); 1014 } 1015 1016 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab) 1017 { 1018 unsigned int i, table, max_register; 1019 1020 for (i = 0; i < ctab->input_count; ++i) 1021 { 1022 if (!ctab->inputs[i].RegisterCount) 1023 continue; 1024 max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1; 1025 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1026 update_table_size(table_sizes, table, max_register); 1027 } 1028 } 1029 1030 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count) 1031 { 1032 static const char *xyzw_str = "xyzw"; 1033 unsigned int i, table; 1034 1035 table = arg->reg.table; 1036 if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT) 1037 { 1038 TRACE("("); 1039 for (i = 0; i < component_count; ++i) 1040 TRACE(i < component_count - 1 ? "%.16e, " : "%.16e", 1041 ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]); 1042 TRACE(")"); 1043 } 1044 else 1045 { 1046 if (arg->index_reg.table == PRES_REGTAB_COUNT) 1047 { 1048 TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset)); 1049 } 1050 else 1051 { 1052 unsigned int index_reg; 1053 1054 index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset); 1055 TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset), 1056 table_symbol[arg->index_reg.table], index_reg, 1057 xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]); 1058 } 1059 for (i = 0; i < component_count; ++i) 1060 TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]); 1061 } 1062 } 1063 1064 static void dump_registers(struct d3dx_const_tab *ctab) 1065 { 1066 unsigned int table, i; 1067 1068 for (i = 0; i < ctab->input_count; ++i) 1069 { 1070 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1071 TRACE("// %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)", 1072 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount); 1073 } 1074 } 1075 1076 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins) 1077 { 1078 unsigned int i; 1079 1080 TRACE("%s ", pres_op_info[ins->op].mnem); 1081 dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count); 1082 for (i = 0; i < pres_op_info[ins->op].input_count; ++i) 1083 { 1084 TRACE(", "); 1085 dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count); 1086 } 1087 TRACE("\n"); 1088 } 1089 1090 static void dump_preshader(struct d3dx_preshader *pres) 1091 { 1092 unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4; 1093 const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED]; 1094 1095 if (immediate_count) 1096 TRACE("// Immediates:\n"); 1097 for (i = 0; i < immediate_count; ++i) 1098 { 1099 if (!(i % 4)) 1100 TRACE("// "); 1101 TRACE("%.8e", immediates[i]); 1102 if (i % 4 == 3) 1103 TRACE("\n"); 1104 else 1105 TRACE(", "); 1106 } 1107 TRACE("// Preshader registers:\n"); 1108 dump_registers(&pres->inputs); 1109 TRACE("preshader\n"); 1110 for (i = 0; i < pres->ins_count; ++i) 1111 dump_ins(&pres->regs, &pres->ins[i]); 1112 } 1113 1114 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base) 1115 { 1116 unsigned int *p; 1117 unsigned int i, j, const_count; 1118 double *dconst; 1119 HRESULT hr; 1120 unsigned int saved_word; 1121 unsigned int section_size; 1122 1123 TRACE("Preshader version %#x.\n", *ptr & 0xffff); 1124 1125 if (!count) 1126 { 1127 WARN("Unexpected end of byte code buffer.\n"); 1128 return D3DXERR_INVALIDDATA; 1129 } 1130 1131 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, §ion_size); 1132 if (p) 1133 { 1134 const_count = *p++; 1135 if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int))) 1136 { 1137 WARN("Byte code buffer ends unexpectedly.\n"); 1138 return D3DXERR_INVALIDDATA; 1139 } 1140 dconst = (double *)p; 1141 } 1142 else 1143 { 1144 const_count = 0; 1145 dconst = NULL; 1146 } 1147 TRACE("%u double constants.\n", const_count); 1148 1149 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, §ion_size); 1150 if (!p) 1151 { 1152 WARN("Could not find preshader code.\n"); 1153 return D3D_OK; 1154 } 1155 pres->ins_count = *p++; 1156 --section_size; 1157 if (pres->ins_count > UINT_MAX / sizeof(*pres->ins)) 1158 { 1159 WARN("Invalid instruction count %u.\n", pres->ins_count); 1160 return D3DXERR_INVALIDDATA; 1161 } 1162 TRACE("%u instructions.\n", pres->ins_count); 1163 pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count); 1164 if (!pres->ins) 1165 return E_OUTOFMEMORY; 1166 for (i = 0; i < pres->ins_count; ++i) 1167 { 1168 unsigned int *ptr_next; 1169 1170 ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]); 1171 if (!ptr_next) 1172 return D3DXERR_INVALIDDATA; 1173 section_size -= ptr_next - p; 1174 p = ptr_next; 1175 } 1176 1177 pres->inputs.regset2table = pres_regset2table; 1178 1179 saved_word = *ptr; 1180 *ptr = 0xfffe0000; 1181 hr = get_constants_desc(ptr, &pres->inputs, base, NULL, 0, NULL); 1182 *ptr = saved_word; 1183 if (FAILED(hr)) 1184 return hr; 1185 1186 if (const_count % get_reg_components(PRES_REGTAB_IMMED)) 1187 { 1188 FIXME("const_count %u is not a multiple of %u.\n", const_count, 1189 get_reg_components(PRES_REGTAB_IMMED)); 1190 return D3DXERR_INVALIDDATA; 1191 } 1192 pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count); 1193 1194 update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs); 1195 for (i = 0; i < pres->ins_count; ++i) 1196 { 1197 for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j) 1198 { 1199 enum pres_reg_tables table; 1200 unsigned int reg_idx; 1201 1202 if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT) 1203 { 1204 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0 1205 : pres->ins[i].component_count - 1; 1206 1207 table = pres->ins[i].inputs[j].reg.table; 1208 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset 1209 + last_component_index); 1210 } 1211 else 1212 { 1213 table = pres->ins[i].inputs[j].index_reg.table; 1214 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset); 1215 } 1216 if (reg_idx >= pres->regs.table_sizes[table]) 1217 { 1218 /* Native accepts these broken preshaders. */ 1219 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u, preshader parsing failed.\n", 1220 i, j, table, reg_idx); 1221 return D3DXERR_INVALIDDATA; 1222 } 1223 } 1224 update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table, 1225 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset)); 1226 } 1227 if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED))) 1228 return E_OUTOFMEMORY; 1229 regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count); 1230 1231 return D3D_OK; 1232 } 1233 1234 HRESULT d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size, 1235 D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter, 1236 const char **skip_constants, unsigned int skip_constants_count) 1237 { 1238 struct d3dx_param_eval *peval; 1239 unsigned int *ptr, *shader_ptr = NULL; 1240 unsigned int i; 1241 BOOL shader; 1242 unsigned int count, pres_size; 1243 HRESULT ret; 1244 1245 TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n", 1246 base_effect, byte_code, byte_code_size, type, peval_out); 1247 1248 count = byte_code_size / sizeof(unsigned int); 1249 if (!byte_code || !count) 1250 { 1251 *peval_out = NULL; 1252 return D3D_OK; 1253 } 1254 1255 peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval)); 1256 if (!peval) 1257 { 1258 ret = E_OUTOFMEMORY; 1259 goto err_out; 1260 } 1261 peval->version_counter = version_counter; 1262 1263 peval->param_type = type; 1264 switch (type) 1265 { 1266 case D3DXPT_VERTEXSHADER: 1267 case D3DXPT_PIXELSHADER: 1268 shader = TRUE; 1269 break; 1270 default: 1271 shader = FALSE; 1272 break; 1273 } 1274 peval->shader_inputs.regset2table = shad_regset2table; 1275 1276 ptr = (unsigned int *)byte_code; 1277 if (shader) 1278 { 1279 if ((*ptr & 0xfffe0000) != 0xfffe0000) 1280 { 1281 FIXME("Invalid shader signature %#x.\n", *ptr); 1282 ret = D3DXERR_INVALIDDATA; 1283 goto err_out; 1284 } 1285 TRACE("Shader version %#x.\n", *ptr & 0xffff); 1286 shader_ptr = ptr; 1287 ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size); 1288 if (!ptr) 1289 TRACE("No preshader found.\n"); 1290 } 1291 else 1292 { 1293 pres_size = count; 1294 } 1295 1296 if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, base_effect))) 1297 { 1298 FIXME("Failed parsing preshader, byte code for analysis follows.\n"); 1299 dump_bytecode(byte_code, byte_code_size); 1300 goto err_out; 1301 } 1302 1303 if (shader) 1304 { 1305 if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, base_effect, 1306 skip_constants, skip_constants_count, &peval->pres))) 1307 { 1308 TRACE("Could not get shader constant table, hr %#x.\n", ret); 1309 goto err_out; 1310 } 1311 update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs); 1312 } 1313 1314 for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i) 1315 { 1316 if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i))) 1317 goto err_out; 1318 } 1319 1320 if (TRACE_ON(d3dx)) 1321 { 1322 dump_bytecode(byte_code, byte_code_size); 1323 dump_preshader(&peval->pres); 1324 if (shader) 1325 { 1326 TRACE("// Shader registers:\n"); 1327 dump_registers(&peval->shader_inputs); 1328 } 1329 } 1330 *peval_out = peval; 1331 TRACE("Created parameter evaluator %p.\n", *peval_out); 1332 return D3D_OK; 1333 1334 err_out: 1335 WARN("Error creating parameter evaluator.\n"); 1336 if (TRACE_ON(d3dx)) 1337 dump_bytecode(byte_code, byte_code_size); 1338 1339 d3dx_free_param_eval(peval); 1340 *peval_out = NULL; 1341 return ret; 1342 } 1343 1344 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab) 1345 { 1346 HeapFree(GetProcessHeap(), 0, ctab->inputs); 1347 HeapFree(GetProcessHeap(), 0, ctab->inputs_param); 1348 HeapFree(GetProcessHeap(), 0, ctab->const_set); 1349 } 1350 1351 static void d3dx_free_preshader(struct d3dx_preshader *pres) 1352 { 1353 HeapFree(GetProcessHeap(), 0, pres->ins); 1354 1355 regstore_free_tables(&pres->regs); 1356 d3dx_free_const_tab(&pres->inputs); 1357 } 1358 1359 void d3dx_free_param_eval(struct d3dx_param_eval *peval) 1360 { 1361 TRACE("peval %p.\n", peval); 1362 1363 if (!peval) 1364 return; 1365 1366 d3dx_free_preshader(&peval->pres); 1367 d3dx_free_const_tab(&peval->shader_inputs); 1368 HeapFree(GetProcessHeap(), 0, peval); 1369 } 1370 1371 static void pres_int_from_float(void *out, const void *in, unsigned int count) 1372 { 1373 unsigned int i; 1374 const float *in_float = in; 1375 int *out_int = out; 1376 1377 for (i = 0; i < count; ++i) 1378 out_int[i] = in_float[i]; 1379 } 1380 1381 static void pres_bool_from_value(void *out, const void *in, unsigned int count) 1382 { 1383 unsigned int i; 1384 const DWORD *in_dword = in; 1385 BOOL *out_bool = out; 1386 1387 for (i = 0; i < count; ++i) 1388 out_bool[i] = !!in_dword[i]; 1389 } 1390 1391 static void pres_float_from_int(void *out, const void *in, unsigned int count) 1392 { 1393 unsigned int i; 1394 const int *in_int = in; 1395 float *out_float = out; 1396 1397 for (i = 0; i < count; ++i) 1398 out_float[i] = in_int[i]; 1399 } 1400 1401 static void pres_float_from_bool(void *out, const void *in, unsigned int count) 1402 { 1403 unsigned int i; 1404 const BOOL *in_bool = in; 1405 float *out_float = out; 1406 1407 for (i = 0; i < count; ++i) 1408 out_float[i] = !!in_bool[i]; 1409 } 1410 1411 static void pres_int_from_bool(void *out, const void *in, unsigned int count) 1412 { 1413 unsigned int i; 1414 const float *in_bool = in; 1415 int *out_int = out; 1416 1417 for (i = 0; i < count; ++i) 1418 out_int[i] = !!in_bool[i]; 1419 } 1420 1421 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table, 1422 unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type) 1423 { 1424 typedef void (*conv_func)(void *out, const void *in, unsigned int count); 1425 static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] = 1426 { 1427 {NULL, NULL, pres_int_from_float, pres_bool_from_value}, 1428 {NULL, NULL, NULL, NULL}, 1429 {pres_float_from_int, NULL, NULL, pres_bool_from_value}, 1430 {pres_float_from_bool, NULL, pres_int_from_bool, NULL} 1431 }; 1432 enum pres_value_type table_type = table_info[table].type; 1433 1434 if (param_type == table_type) 1435 { 1436 regstore_set_values(rs, table, in, offset, count); 1437 return; 1438 } 1439 1440 set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count); 1441 } 1442 1443 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1444 D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr, 1445 unsigned int start, unsigned int count) 1446 { 1447 if (type == D3DXPT_VERTEXSHADER) 1448 { 1449 switch(table) 1450 { 1451 case PRES_REGTAB_OCONST: 1452 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count); 1453 case PRES_REGTAB_OICONST: 1454 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count); 1455 case PRES_REGTAB_OBCONST: 1456 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count); 1457 default: 1458 FIXME("Unexpected register table %u.\n", table); 1459 return D3DERR_INVALIDCALL; 1460 } 1461 } 1462 else if (type == D3DXPT_PIXELSHADER) 1463 { 1464 switch(table) 1465 { 1466 case PRES_REGTAB_OCONST: 1467 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count); 1468 case PRES_REGTAB_OICONST: 1469 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count); 1470 case PRES_REGTAB_OBCONST: 1471 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count); 1472 default: 1473 FIXME("Unexpected register table %u.\n", table); 1474 return D3DERR_INVALIDCALL; 1475 } 1476 } 1477 else 1478 { 1479 FIXME("Unexpected parameter type %u.\n", type); 1480 return D3DERR_INVALIDCALL; 1481 } 1482 } 1483 1484 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab, 1485 ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1486 D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty) 1487 { 1488 unsigned int const_idx; 1489 unsigned int current_start = 0, current_count = 0; 1490 enum pres_reg_tables current_table = PRES_REGTAB_COUNT; 1491 BOOL update_device = manager || device; 1492 HRESULT hr, result = D3D_OK; 1493 ULONG64 update_version = const_tab->update_version; 1494 1495 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1496 { 1497 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1498 enum pres_reg_tables table = const_set->table; 1499 struct d3dx_parameter *param = const_set->param; 1500 unsigned int element, i, j, start_offset; 1501 struct const_upload_info info; 1502 unsigned int *data; 1503 enum pres_value_type param_type; 1504 1505 if (!(param && is_param_dirty(param, update_version))) 1506 continue; 1507 1508 data = param->data; 1509 start_offset = get_offset_reg(table, const_set->register_index); 1510 if (const_set->direct_copy) 1511 { 1512 regstore_set_values(rs, table, data, start_offset, 1513 get_offset_reg(table, const_set->register_count)); 1514 continue; 1515 } 1516 param_type = table_type_from_param_type(param->type); 1517 if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR) 1518 { 1519 unsigned int count = max(param->rows, param->columns); 1520 1521 if (count >= get_reg_components(table)) 1522 { 1523 regstore_set_data(rs, table, start_offset, data, 1524 count * const_set->element_count, param_type); 1525 } 1526 else 1527 { 1528 for (element = 0; element < const_set->element_count; ++element) 1529 regstore_set_data(rs, table, start_offset + get_offset_reg(table, element), 1530 &data[element * count], count, param_type); 1531 } 1532 continue; 1533 } 1534 get_const_upload_info(const_set, &info); 1535 for (element = 0; element < const_set->element_count; ++element) 1536 { 1537 unsigned int *out = (unsigned int *)rs->tables[table] + start_offset; 1538 1539 /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer. 1540 * All the supported types of parameters and table values have the same size. */ 1541 if (info.transpose) 1542 { 1543 for (i = 0; i < info.major_count; ++i) 1544 for (j = 0; j < info.minor; ++j) 1545 out[i * info.major_stride + j] = data[i + j * info.major]; 1546 1547 for (j = 0; j < info.minor_remainder; ++j) 1548 out[i * info.major_stride + j] = data[i + j * info.major]; 1549 } 1550 else 1551 { 1552 for (i = 0; i < info.major_count; ++i) 1553 for (j = 0; j < info.minor; ++j) 1554 out[i * info.major_stride + j] = data[i * info.minor + j]; 1555 } 1556 start_offset += get_offset_reg(table, const_set->register_count); 1557 data += param->rows * param->columns; 1558 } 1559 start_offset = get_offset_reg(table, const_set->register_index); 1560 if (table_info[table].type != param_type) 1561 regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset, 1562 get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type); 1563 } 1564 const_tab->update_version = new_update_version; 1565 if (!update_device) 1566 return D3D_OK; 1567 1568 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1569 { 1570 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1571 1572 if (device_update_all || (const_set->param 1573 ? is_param_dirty(const_set->param, update_version) : pres_dirty)) 1574 { 1575 enum pres_reg_tables table = const_set->table; 1576 1577 if (table == current_table && current_start + current_count == const_set->register_index) 1578 { 1579 current_count += const_set->register_count * const_set->element_count; 1580 } 1581 else 1582 { 1583 if (current_count) 1584 { 1585 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1586 (DWORD *)rs->tables[current_table] 1587 + get_offset_reg(current_table, current_start), current_start, current_count))) 1588 result = hr; 1589 } 1590 current_table = table; 1591 current_start = const_set->register_index; 1592 current_count = const_set->register_count * const_set->element_count; 1593 } 1594 } 1595 } 1596 if (current_count) 1597 { 1598 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1599 (DWORD *)rs->tables[current_table] 1600 + get_offset_reg(current_table, current_start), current_start, current_count))) 1601 result = hr; 1602 } 1603 return result; 1604 } 1605 1606 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset) 1607 { 1608 return regstore_get_double(rs, table, offset); 1609 } 1610 1611 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp) 1612 { 1613 unsigned int offset, base_index, reg_index, table; 1614 1615 table = opr->reg.table; 1616 1617 if (opr->index_reg.table == PRES_REGTAB_COUNT) 1618 base_index = 0; 1619 else 1620 base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset)); 1621 1622 offset = get_offset_reg(table, base_index) + opr->reg.offset + comp; 1623 reg_index = get_reg_offset(table, offset); 1624 1625 if (reg_index >= rs->table_sizes[table]) 1626 { 1627 unsigned int wrap_size; 1628 1629 if (table == PRES_REGTAB_CONST) 1630 { 1631 /* As it can be guessed from tests, offset into floating constant table is wrapped 1632 * to the nearest power of 2 and not to the actual table size. */ 1633 for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1) 1634 ; 1635 } 1636 else 1637 { 1638 wrap_size = rs->table_sizes[table]; 1639 } 1640 WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n", 1641 reg_index, table, wrap_size, rs->table_sizes[table]); 1642 reg_index %= wrap_size; 1643 1644 if (reg_index >= rs->table_sizes[table]) 1645 return 0.0; 1646 1647 offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table); 1648 } 1649 1650 return exec_get_reg_value(rs, table, offset); 1651 } 1652 1653 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg, 1654 unsigned int comp, double res) 1655 { 1656 regstore_set_double(rs, reg->table, reg->offset + comp, res); 1657 } 1658 1659 #define ARGS_ARRAY_SIZE 8 1660 static HRESULT execute_preshader(struct d3dx_preshader *pres) 1661 { 1662 unsigned int i, j, k; 1663 double args[ARGS_ARRAY_SIZE]; 1664 double res; 1665 1666 for (i = 0; i < pres->ins_count; ++i) 1667 { 1668 const struct d3dx_pres_ins *ins; 1669 const struct op_info *oi; 1670 1671 ins = &pres->ins[i]; 1672 oi = &pres_op_info[ins->op]; 1673 if (oi->func_all_comps) 1674 { 1675 if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE) 1676 { 1677 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count); 1678 return E_FAIL; 1679 } 1680 for (k = 0; k < oi->input_count; ++k) 1681 for (j = 0; j < ins->component_count; ++j) 1682 args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k], 1683 ins->scalar_op && !k ? 0 : j); 1684 res = oi->func(args, ins->component_count); 1685 1686 /* only 'dot' instruction currently falls here */ 1687 exec_set_arg(&pres->regs, &ins->output.reg, 0, res); 1688 } 1689 else 1690 { 1691 for (j = 0; j < ins->component_count; ++j) 1692 { 1693 for (k = 0; k < oi->input_count; ++k) 1694 args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j); 1695 res = oi->func(args, ins->component_count); 1696 exec_set_arg(&pres->regs, &ins->output.reg, j, res); 1697 } 1698 } 1699 } 1700 return D3D_OK; 1701 } 1702 1703 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version) 1704 { 1705 unsigned int i; 1706 1707 if (update_version == ULONG64_MAX) 1708 update_version = ctab->update_version; 1709 for (i = 0; i < ctab->input_count; ++i) 1710 { 1711 if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]), 1712 update_version)) 1713 return TRUE; 1714 } 1715 return FALSE; 1716 } 1717 1718 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version) 1719 { 1720 return is_const_tab_input_dirty(&peval->pres.inputs, update_version) 1721 || is_const_tab_input_dirty(&peval->shader_inputs, update_version); 1722 } 1723 1724 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param, 1725 void *param_value) 1726 { 1727 HRESULT hr; 1728 unsigned int i; 1729 unsigned int elements, elements_param, elements_table; 1730 float *oc; 1731 1732 TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value); 1733 1734 if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX)) 1735 { 1736 set_constants(&peval->pres.regs, &peval->pres.inputs, 1737 next_update_version(peval->version_counter), 1738 NULL, NULL, peval->param_type, FALSE, FALSE); 1739 1740 if (FAILED(hr = execute_preshader(&peval->pres))) 1741 return hr; 1742 } 1743 1744 elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]); 1745 elements_param = param->bytes / sizeof(unsigned int); 1746 elements = min(elements_table, elements_param); 1747 oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST]; 1748 for (i = 0; i < elements; ++i) 1749 set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT); 1750 return D3D_OK; 1751 } 1752 1753 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1754 struct d3dx_param_eval *peval, BOOL update_all) 1755 { 1756 HRESULT hr; 1757 struct d3dx_preshader *pres = &peval->pres; 1758 struct d3dx_regstore *rs = &pres->regs; 1759 ULONG64 new_update_version = next_update_version(peval->version_counter); 1760 BOOL pres_dirty = FALSE; 1761 1762 TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type); 1763 1764 if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX)) 1765 { 1766 set_constants(rs, &pres->inputs, new_update_version, 1767 NULL, NULL, peval->param_type, FALSE, FALSE); 1768 if (FAILED(hr = execute_preshader(pres))) 1769 return hr; 1770 pres_dirty = TRUE; 1771 } 1772 1773 return set_constants(rs, &peval->shader_inputs, new_update_version, 1774 manager, device, peval->param_type, update_all, pres_dirty); 1775 } 1776