1 /* 2 * Copyright 2016 Paul Gofman 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * This library is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with this library; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 17 */ 18 19 #include "config.h" 20 #include "wine/port.h" 21 22 #include "d3dx9_private.h" 23 24 #include <float.h> 25 #include <assert.h> 26 27 WINE_DEFAULT_DEBUG_CHANNEL(d3dx); 28 29 /* ReactOS FIXME: Insect */ 30 #define fmin min 31 #define fmax max 32 33 enum pres_ops 34 { 35 PRESHADER_OP_NOP, 36 PRESHADER_OP_MOV, 37 PRESHADER_OP_NEG, 38 PRESHADER_OP_RCP, 39 PRESHADER_OP_FRC, 40 PRESHADER_OP_EXP, 41 PRESHADER_OP_LOG, 42 PRESHADER_OP_RSQ, 43 PRESHADER_OP_SIN, 44 PRESHADER_OP_COS, 45 PRESHADER_OP_ASIN, 46 PRESHADER_OP_ACOS, 47 PRESHADER_OP_ATAN, 48 PRESHADER_OP_MIN, 49 PRESHADER_OP_MAX, 50 PRESHADER_OP_LT, 51 PRESHADER_OP_GE, 52 PRESHADER_OP_ADD, 53 PRESHADER_OP_MUL, 54 PRESHADER_OP_ATAN2, 55 PRESHADER_OP_DIV, 56 PRESHADER_OP_CMP, 57 PRESHADER_OP_DOT, 58 PRESHADER_OP_DOTSWIZ6, 59 PRESHADER_OP_DOTSWIZ8, 60 }; 61 62 typedef double (*pres_op_func)(double *args, int n); 63 64 static double to_signed_nan(double v) 65 { 66 static const union 67 { 68 ULONG64 ulong64_value; 69 double double_value; 70 } 71 signed_nan = 72 { 73 0xfff8000000000000 74 }; 75 76 return isnan(v) ? signed_nan.double_value : v; 77 } 78 79 static double pres_mov(double *args, int n) {return args[0];} 80 static double pres_add(double *args, int n) {return args[0] + args[1];} 81 static double pres_mul(double *args, int n) {return args[0] * args[1];} 82 static double pres_dot(double *args, int n) 83 { 84 int i; 85 double sum; 86 87 sum = 0.0; 88 for (i = 0; i < n; ++i) 89 sum += args[i] * args[i + n]; 90 return sum; 91 } 92 93 static double pres_dotswiz6(double *args, int n) 94 { 95 return pres_dot(args, 3); 96 } 97 98 static double pres_dotswiz8(double *args, int n) 99 { 100 return pres_dot(args, 4); 101 } 102 103 static double pres_neg(double *args, int n) {return -args[0];} 104 static double pres_rcp(double *args, int n) {return 1.0 / args[0];} 105 static double pres_lt(double *args, int n) {return args[0] < args[1] ? 1.0 : 0.0;} 106 static double pres_ge(double *args, int n) {return args[0] >= args[1] ? 1.0 : 0.0;} 107 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);} 108 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);} 109 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);} 110 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];} 111 static double pres_sin(double *args, int n) {return sin(args[0]);} 112 static double pres_cos(double *args, int n) {return cos(args[0]);} 113 static double pres_rsq(double *args, int n) 114 { 115 double v; 116 117 v = fabs(args[0]); 118 if (v == 0.0) 119 return INFINITY; 120 else 121 return 1.0 / sqrt(v); 122 } 123 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);} 124 static double pres_log(double *args, int n) 125 { 126 double v; 127 128 v = fabs(args[0]); 129 if (v == 0.0) 130 return 0.0; 131 else 132 #ifdef HAVE_LOG2 133 return log2(v); 134 #else 135 return log(v) / log(2); 136 #endif 137 } 138 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));} 139 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));} 140 static double pres_atan(double *args, int n) {return atan(args[0]);} 141 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);} 142 143 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever 144 * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */ 145 static double pres_div(double *args, int n) {return 0.0;} 146 147 #define PRES_OPCODE_MASK 0x7ff00000 148 #define PRES_OPCODE_SHIFT 20 149 #define PRES_SCALAR_FLAG 0x80000000 150 #define PRES_NCOMP_MASK 0x0000ffff 151 152 #define FOURCC_PRES 0x53455250 153 #define FOURCC_CLIT 0x54494c43 154 #define FOURCC_FXLC 0x434c5846 155 #define FOURCC_PRSI 0x49535250 156 #define PRES_SIGN 0x46580000 157 158 struct op_info 159 { 160 unsigned int opcode; 161 char mnem[16]; 162 unsigned int input_count; 163 BOOL func_all_comps; 164 pres_op_func func; 165 }; 166 167 static const struct op_info pres_op_info[] = 168 { 169 {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */ 170 {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */ 171 {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */ 172 {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */ 173 {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */ 174 {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */ 175 {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */ 176 {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */ 177 {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */ 178 {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */ 179 {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */ 180 {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */ 181 {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */ 182 {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */ 183 {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */ 184 {0x202, "lt", 2, 0, pres_lt }, /* PRESHADER_OP_LT */ 185 {0x203, "ge", 2, 0, pres_ge }, /* PRESHADER_OP_GE */ 186 {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */ 187 {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */ 188 {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */ 189 {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */ 190 {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */ 191 {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */ 192 {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */ 193 {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */ 194 }; 195 196 enum pres_value_type 197 { 198 PRES_VT_FLOAT, 199 PRES_VT_DOUBLE, 200 PRES_VT_INT, 201 PRES_VT_BOOL, 202 PRES_VT_COUNT 203 }; 204 205 static const struct 206 { 207 unsigned int component_size; 208 enum pres_value_type type; 209 } 210 table_info[] = 211 { 212 {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */ 213 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */ 214 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */ 215 {sizeof(BOOL), PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */ 216 {sizeof(int), PRES_VT_INT, }, /* PRES_REGTAB_OICONST */ 217 /* TODO: use double precision for 64 bit */ 218 {sizeof(float), PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */ 219 }; 220 221 static const char *table_symbol[] = 222 { 223 "imm", "c", "oc", "ob", "oi", "r", "(null)", 224 }; 225 226 static const enum pres_reg_tables pres_regset2table[] = 227 { 228 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 229 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 230 PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */ 231 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 232 }; 233 234 static const enum pres_reg_tables shad_regset2table[] = 235 { 236 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 237 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 238 PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */ 239 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 240 }; 241 242 struct d3dx_pres_reg 243 { 244 enum pres_reg_tables table; 245 /* offset is component index, not register index, e. g. 246 offset for component c3.y is 13 (3 * 4 + 1) */ 247 unsigned int offset; 248 }; 249 250 struct d3dx_pres_operand 251 { 252 struct d3dx_pres_reg reg; 253 struct d3dx_pres_reg index_reg; 254 }; 255 256 #define MAX_INPUTS_COUNT 8 257 258 struct d3dx_pres_ins 259 { 260 enum pres_ops op; 261 /* first input argument is scalar, 262 scalar component is propagated */ 263 BOOL scalar_op; 264 unsigned int component_count; 265 struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT]; 266 struct d3dx_pres_operand output; 267 }; 268 269 struct const_upload_info 270 { 271 BOOL transpose; 272 unsigned int major, minor; 273 unsigned int major_stride; 274 unsigned int major_count; 275 unsigned int count; 276 unsigned int minor_remainder; 277 }; 278 279 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type) 280 { 281 switch (type) 282 { 283 case D3DXPT_FLOAT: 284 return PRES_VT_FLOAT; 285 case D3DXPT_INT: 286 return PRES_VT_INT; 287 case D3DXPT_BOOL: 288 return PRES_VT_BOOL; 289 default: 290 FIXME("Unsupported type %u.\n", type); 291 return PRES_VT_COUNT; 292 } 293 } 294 295 static unsigned int get_reg_offset(unsigned int table, unsigned int offset) 296 { 297 return table == PRES_REGTAB_OBCONST ? offset : offset >> 2; 298 } 299 300 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx) 301 { 302 return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2; 303 } 304 305 static unsigned int get_reg_components(unsigned int table) 306 { 307 return get_offset_reg(table, 1); 308 } 309 310 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8) 311 312 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table) 313 { 314 unsigned int size; 315 316 size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size; 317 if (size) 318 { 319 rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size); 320 if (!rs->tables[table]) 321 return E_OUTOFMEMORY; 322 } 323 return D3D_OK; 324 } 325 326 static void regstore_free_tables(struct d3dx_regstore *rs) 327 { 328 unsigned int i; 329 330 for (i = 0; i < PRES_REGTAB_COUNT; ++i) 331 { 332 HeapFree(GetProcessHeap(), 0, rs->tables[i]); 333 } 334 } 335 336 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data, 337 unsigned int start_offset, unsigned int count) 338 { 339 BYTE *dst = rs->tables[table]; 340 const BYTE *src = data; 341 unsigned int size; 342 343 dst += start_offset * table_info[table].component_size; 344 size = count * table_info[table].component_size; 345 assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst)); 346 memcpy(dst, src, size); 347 } 348 349 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset) 350 { 351 BYTE *p; 352 353 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 354 switch (table_info[table].type) 355 { 356 case PRES_VT_FLOAT: 357 return *(float *)p; 358 case PRES_VT_DOUBLE: 359 return *(double *)p; 360 default: 361 FIXME("Unexpected preshader input from table %u.\n", table); 362 return NAN; 363 } 364 } 365 366 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v) 367 { 368 BYTE *p; 369 370 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 371 switch (table_info[table].type) 372 { 373 case PRES_VT_FLOAT : *(float *)p = v; break; 374 case PRES_VT_DOUBLE: *(double *)p = v; break; 375 case PRES_VT_INT : *(int *)p = lrint(v); break; 376 case PRES_VT_BOOL : *(BOOL *)p = !!v; break; 377 default: 378 FIXME("Bad type %u.\n", table_info[table].type); 379 break; 380 } 381 } 382 383 static void dump_bytecode(void *data, unsigned int size) 384 { 385 unsigned int *bytecode = (unsigned int *)data; 386 unsigned int i, j, n; 387 388 size /= sizeof(*bytecode); 389 i = 0; 390 while (i < size) 391 { 392 n = min(size - i, 8); 393 for (j = 0; j < n; ++j) 394 TRACE("0x%08x,", bytecode[i + j]); 395 i += n; 396 TRACE("\n"); 397 } 398 } 399 400 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count, 401 unsigned int fourcc, unsigned int *size) 402 { 403 /* Provide at least one value in comment section on non-NULL return. */ 404 while (count > 2 && (*ptr & 0xffff) == 0xfffe) 405 { 406 unsigned int section_size; 407 408 section_size = (*ptr >> 16); 409 if (!section_size || section_size + 1 > count) 410 break; 411 if (*(ptr + 1) == fourcc) 412 { 413 *size = section_size; 414 return ptr + 2; 415 } 416 count -= section_size + 1; 417 ptr += section_size + 1; 418 } 419 return NULL; 420 } 421 422 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg) 423 { 424 static const enum pres_reg_tables reg_table[8] = 425 { 426 PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT, 427 PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP 428 }; 429 430 if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT) 431 { 432 FIXME("Unsupported register table %#x.\n", *ptr); 433 return NULL; 434 } 435 436 reg->table = reg_table[*ptr++]; 437 reg->offset = *ptr++; 438 return ptr; 439 } 440 441 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr) 442 { 443 if (count < 3 || (*ptr && count < 5)) 444 { 445 WARN("Byte code buffer ends unexpectedly, count %u.\n", count); 446 return NULL; 447 } 448 449 if (*ptr) 450 { 451 if (*ptr != 1) 452 { 453 FIXME("Unknown relative addressing flag, word %#x.\n", *ptr); 454 return NULL; 455 } 456 ptr = parse_pres_reg(ptr + 1, &opr->index_reg); 457 if (!ptr) 458 return NULL; 459 } 460 else 461 { 462 opr->index_reg.table = PRES_REGTAB_COUNT; 463 ++ptr; 464 } 465 466 ptr = parse_pres_reg(ptr, &opr->reg); 467 468 if (opr->reg.table == PRES_REGTAB_OBCONST) 469 opr->reg.offset /= 4; 470 return ptr; 471 } 472 473 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins) 474 { 475 unsigned int ins_code, ins_raw; 476 unsigned int input_count; 477 unsigned int i; 478 479 if (count < 2) 480 { 481 WARN("Byte code buffer ends unexpectedly.\n"); 482 return NULL; 483 } 484 485 ins_raw = *ptr++; 486 ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT; 487 ins->component_count = ins_raw & PRES_NCOMP_MASK; 488 ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG); 489 490 if (ins->component_count < 1 || ins->component_count > 4) 491 { 492 FIXME("Unsupported number of components %u.\n", ins->component_count); 493 return NULL; 494 } 495 input_count = *ptr++; 496 count -= 2; 497 for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i) 498 if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count) 499 break; 500 if (i == ARRAY_SIZE(pres_op_info)) 501 { 502 FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw); 503 return NULL; 504 } 505 ins->op = i; 506 if (input_count > ARRAY_SIZE(ins->inputs)) 507 { 508 FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count, 509 pres_op_info[i].mnem); 510 return NULL; 511 } 512 for (i = 0; i < input_count; ++i) 513 { 514 unsigned int *p; 515 516 p = parse_pres_arg(ptr, count, &ins->inputs[i]); 517 if (!p) 518 return NULL; 519 count -= p - ptr; 520 ptr = p; 521 } 522 ptr = parse_pres_arg(ptr, count, &ins->output); 523 if (ins->output.index_reg.table != PRES_REGTAB_COUNT) 524 { 525 FIXME("Relative addressing in output register not supported.\n"); 526 return NULL; 527 } 528 if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset 529 + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1)) 530 != get_reg_offset(ins->output.reg.table, ins->output.reg.offset)) 531 { 532 FIXME("Instructions outputting multiple registers are not supported.\n"); 533 return NULL; 534 } 535 return ptr; 536 } 537 538 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc, 539 WORD *constantinfo_reserved) 540 { 541 const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc); 542 543 if (!constant) 544 { 545 FIXME("Could not get constant desc.\n"); 546 if (constantinfo_reserved) 547 *constantinfo_reserved = 0; 548 return D3DERR_INVALIDCALL; 549 } 550 *desc = constant->desc; 551 if (constantinfo_reserved) 552 *constantinfo_reserved = constant->constantinfo_reserved; 553 return D3D_OK; 554 } 555 556 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set, 557 struct const_upload_info *info) 558 { 559 struct d3dx_parameter *param = const_set->param; 560 unsigned int table = const_set->table; 561 562 info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS) 563 || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS); 564 if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS) 565 { 566 info->major = param->columns; 567 info->minor = param->rows; 568 } 569 else 570 { 571 info->major = param->rows; 572 info->minor = param->columns; 573 } 574 575 if (get_reg_components(table) == 1) 576 { 577 unsigned int const_length = get_offset_reg(table, const_set->register_count); 578 579 info->major_stride = info->minor; 580 info->major_count = const_length / info->major_stride; 581 info->minor_remainder = const_length % info->major_stride; 582 } 583 else 584 { 585 info->major_stride = get_reg_components(table); 586 info->major_count = const_set->register_count; 587 info->minor_remainder = 0; 588 } 589 info->count = info->major_count * info->minor + info->minor_remainder; 590 } 591 592 #define INITIAL_CONST_SET_SIZE 16 593 594 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set) 595 { 596 if (const_tab->const_set_count >= const_tab->const_set_size) 597 { 598 unsigned int new_size; 599 struct d3dx_const_param_eval_output *new_alloc; 600 601 if (!const_tab->const_set_size) 602 { 603 new_size = INITIAL_CONST_SET_SIZE; 604 new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size); 605 if (!new_alloc) 606 { 607 ERR("Out of memory.\n"); 608 return E_OUTOFMEMORY; 609 } 610 } 611 else 612 { 613 new_size = const_tab->const_set_size * 2; 614 new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set, 615 sizeof(*const_tab->const_set) * new_size); 616 if (!new_alloc) 617 { 618 ERR("Out of memory.\n"); 619 return E_OUTOFMEMORY; 620 } 621 } 622 const_tab->const_set = new_alloc; 623 const_tab->const_set_size = new_size; 624 } 625 const_tab->const_set[const_tab->const_set_count++] = *set; 626 return D3D_OK; 627 } 628 629 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab, 630 struct d3dx_preshader *pres) 631 { 632 unsigned int i; 633 struct d3dx_const_param_eval_output const_set = {NULL}; 634 635 for (i = 0; i < pres->ins_count; ++i) 636 { 637 const struct d3dx_pres_ins *ins = &pres->ins[i]; 638 const struct d3dx_pres_reg *reg = &ins->output.reg; 639 640 if (reg->table == PRES_REGTAB_TEMP) 641 continue; 642 643 const_set.register_index = get_reg_offset(reg->table, reg->offset); 644 const_set.register_count = 1; 645 const_set.table = reg->table; 646 const_set.constant_class = D3DXPC_FORCE_DWORD; 647 const_set.element_count = 1; 648 append_const_set(const_tab, &const_set); 649 } 650 } 651 652 static int compare_const_set(const void *a, const void *b) 653 { 654 const struct d3dx_const_param_eval_output *r1 = a; 655 const struct d3dx_const_param_eval_output *r2 = b; 656 657 if (r1->table != r2->table) 658 return r1->table - r2->table; 659 return r1->register_index - r2->register_index; 660 } 661 662 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab, 663 struct d3dx_parameter *param, unsigned int index) 664 { 665 unsigned int i, start_index = index; 666 DWORD *current_data; 667 enum pres_reg_tables current_table; 668 unsigned int current_start_offset, element_count; 669 struct d3dx_const_param_eval_output *first_const; 670 671 if (!const_tab->const_set_count) 672 return D3D_OK; 673 674 while (index < const_tab->const_set_count - 1) 675 { 676 first_const = &const_tab->const_set[index]; 677 current_data = first_const->param->data; 678 current_table = first_const->table; 679 current_start_offset = get_offset_reg(current_table, first_const->register_index); 680 element_count = 0; 681 for (i = index; i < const_tab->const_set_count; ++i) 682 { 683 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i]; 684 unsigned int count = get_offset_reg(const_set->table, 685 const_set->register_count * const_set->element_count); 686 unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index); 687 688 if (!(const_set->table == current_table && current_start_offset == start_offset 689 && const_set->direct_copy == first_const->direct_copy 690 && current_data == const_set->param->data 691 && (const_set->direct_copy || (first_const->param->type == const_set->param->type 692 && first_const->param->class == const_set->param->class 693 && first_const->param->columns == const_set->param->columns 694 && first_const->param->rows == const_set->param->rows 695 && first_const->register_count == const_set->register_count 696 && (i == const_tab->const_set_count - 1 697 || first_const->param->element_count == const_set->param->element_count))))) 698 break; 699 700 current_start_offset += count; 701 current_data += const_set->direct_copy ? count : const_set->param->rows 702 * const_set->param->columns * const_set->element_count; 703 element_count += const_set->element_count; 704 } 705 706 if (i > index + 1) 707 { 708 TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index, 709 debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy); 710 711 first_const->element_count = element_count; 712 if (first_const->direct_copy) 713 { 714 first_const->element_count = 1; 715 if (index == start_index 716 && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT)) 717 { 718 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 719 return D3DERR_INVALIDCALL; 720 first_const->param = param; 721 } 722 first_const->register_count = get_reg_offset(current_table, current_start_offset) 723 - first_const->register_index; 724 } 725 memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i], 726 sizeof(*const_tab->const_set) * (const_tab->const_set_count - i)); 727 const_tab->const_set_count -= i - index - 1; 728 } 729 else 730 { 731 TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n", 732 const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy); 733 } 734 index = i; 735 } 736 return D3D_OK; 737 } 738 739 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab, 740 D3DXHANDLE hc, struct d3dx_parameter *param) 741 { 742 D3DXCONSTANT_DESC desc; 743 unsigned int const_count, param_count, i; 744 BOOL get_element; 745 struct d3dx_const_param_eval_output const_set; 746 struct const_upload_info info; 747 enum pres_value_type table_type; 748 HRESULT hr; 749 750 if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL))) 751 return D3DERR_INVALIDCALL; 752 753 if (param->element_count) 754 { 755 param_count = param->element_count; 756 const_count = desc.Elements; 757 get_element = TRUE; 758 } 759 else 760 { 761 if (desc.Elements > 1) 762 { 763 FIXME("Unexpected number of constant elements %u.\n", desc.Elements); 764 return D3DERR_INVALIDCALL; 765 } 766 param_count = param->member_count; 767 const_count = desc.StructMembers; 768 get_element = FALSE; 769 } 770 if (const_count != param_count) 771 { 772 FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n", 773 param_count, const_count); 774 return D3DERR_INVALIDCALL; 775 } 776 if (const_count) 777 { 778 HRESULT ret = D3D_OK; 779 D3DXHANDLE hc_element; 780 unsigned int index = const_tab->const_set_count; 781 782 for (i = 0; i < const_count; ++i) 783 { 784 if (get_element) 785 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i); 786 else 787 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i); 788 if (!hc_element) 789 { 790 FIXME("Could not get constant.\n"); 791 hr = D3DERR_INVALIDCALL; 792 } 793 else 794 { 795 hr = init_set_constants_param(const_tab, ctab, hc_element, ¶m->members[i]); 796 } 797 if (FAILED(hr)) 798 ret = hr; 799 } 800 if (FAILED(ret)) 801 return ret; 802 return merge_const_set_entries(const_tab, param, index); 803 } 804 805 TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n", 806 debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes); 807 TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n", 808 debugstr_a(param->name), param->rows, param->columns, param->class, 809 param->flags, param->bytes); 810 811 const_set.element_count = 1; 812 const_set.param = param; 813 const_set.constant_class = desc.Class; 814 if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table)) 815 { 816 FIXME("Unknown register set %u.\n", desc.RegisterSet); 817 return D3DERR_INVALIDCALL; 818 } 819 const_set.register_index = desc.RegisterIndex; 820 const_set.table = const_tab->regset2table[desc.RegisterSet]; 821 if (const_set.table >= PRES_REGTAB_COUNT) 822 { 823 ERR("Unexpected register set %u.\n", desc.RegisterSet); 824 return D3DERR_INVALIDCALL; 825 } 826 assert(table_info[const_set.table].component_size == sizeof(unsigned int)); 827 assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int)); 828 const_set.register_count = desc.RegisterCount; 829 table_type = table_info[const_set.table].type; 830 get_const_upload_info(&const_set, &info); 831 if (!info.count) 832 { 833 TRACE("%s has zero count, skipping.\n", debugstr_a(param->name)); 834 return D3D_OK; 835 } 836 837 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 838 return D3DERR_INVALIDCALL; 839 840 const_set.direct_copy = table_type_from_param_type(param->type) == table_type 841 && !info.transpose && info.minor == info.major_stride 842 && info.count == get_offset_reg(const_set.table, const_set.register_count) 843 && info.count * sizeof(unsigned int) <= param->bytes; 844 if (info.minor_remainder && !const_set.direct_copy && !info.transpose) 845 FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n", 846 debugstr_a(param->name)); 847 848 if (info.major_count > info.major 849 || (info.major_count == info.major && info.minor_remainder)) 850 { 851 WARN("Constant dimensions exceed parameter size.\n"); 852 return D3DERR_INVALIDCALL; 853 } 854 855 if (FAILED(hr = append_const_set(const_tab, &const_set))) 856 return hr; 857 858 return D3D_OK; 859 } 860 861 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out, 862 struct d3dx9_base_effect *base, const char **skip_constants, 863 unsigned int skip_constants_count, struct d3dx_preshader *pres) 864 { 865 ID3DXConstantTable *ctab; 866 D3DXCONSTANT_DESC *cdesc; 867 struct d3dx_parameter **inputs_param; 868 D3DXCONSTANTTABLE_DESC desc; 869 HRESULT hr; 870 D3DXHANDLE hc; 871 unsigned int i, j; 872 873 hr = D3DXGetShaderConstantTable(byte_code, &ctab); 874 if (FAILED(hr) || !ctab) 875 { 876 TRACE("Could not get CTAB data, hr %#x.\n", hr); 877 /* returning OK, shaders and preshaders without CTAB are valid */ 878 return D3D_OK; 879 } 880 if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc))) 881 { 882 FIXME("Could not get CTAB desc, hr %#x.\n", hr); 883 goto cleanup; 884 } 885 886 out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants); 887 out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants); 888 if (!cdesc || !inputs_param) 889 { 890 hr = E_OUTOFMEMORY; 891 goto cleanup; 892 } 893 894 for (i = 0; i < desc.Constants; ++i) 895 { 896 unsigned int index = out->input_count; 897 WORD constantinfo_reserved; 898 899 hc = ID3DXConstantTable_GetConstant(ctab, NULL, i); 900 if (!hc) 901 { 902 FIXME("Null constant handle.\n"); 903 goto cleanup; 904 } 905 if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved))) 906 goto cleanup; 907 inputs_param[index] = get_parameter_by_name(base, NULL, cdesc[index].Name); 908 if (!inputs_param[index]) 909 { 910 WARN("Could not find parameter %s in effect.\n", cdesc[index].Name); 911 continue; 912 } 913 if (cdesc[index].Class == D3DXPC_OBJECT) 914 { 915 TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]); 916 if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT 917 || !is_param_type_sampler(inputs_param[index]->type)) 918 { 919 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name)); 920 hr = D3DERR_INVALIDCALL; 921 goto cleanup; 922 } 923 if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount) 924 { 925 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name)); 926 hr = D3DERR_INVALIDCALL; 927 goto cleanup; 928 } 929 } 930 if (!is_top_level_parameter(inputs_param[index])) 931 { 932 WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name)); 933 hr = E_FAIL; 934 goto cleanup; 935 } 936 937 for (j = 0; j < skip_constants_count; ++j) 938 { 939 if (!strcmp(cdesc[index].Name, skip_constants[j])) 940 { 941 if (!constantinfo_reserved) 942 { 943 WARN("skip_constants parameter %s is not register bound.\n", 944 cdesc[index].Name); 945 hr = D3DERR_INVALIDCALL; 946 goto cleanup; 947 } 948 TRACE("Skipping constant %s.\n", cdesc[index].Name); 949 break; 950 } 951 } 952 if (j < skip_constants_count) 953 continue; 954 ++out->input_count; 955 if (inputs_param[index]->class == D3DXPC_OBJECT) 956 continue; 957 if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index]))) 958 goto cleanup; 959 } 960 if (pres) 961 append_pres_const_sets_for_shader_input(out, pres); 962 if (out->const_set_count) 963 { 964 struct d3dx_const_param_eval_output *new_alloc; 965 966 qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set); 967 968 i = 0; 969 while (i < out->const_set_count - 1) 970 { 971 if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD 972 && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD 973 && out->const_set[i].table == out->const_set[i + 1].table 974 && out->const_set[i].register_index + out->const_set[i].register_count 975 >= out->const_set[i + 1].register_index) 976 { 977 assert(out->const_set[i].register_index + out->const_set[i].register_count 978 <= out->const_set[i + 1].register_index + 1); 979 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1 980 - out->const_set[i].register_index; 981 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i]) 982 * (out->const_set_count - i - 2)); 983 --out->const_set_count; 984 } 985 else 986 { 987 ++i; 988 } 989 } 990 991 new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set, 992 sizeof(*out->const_set) * out->const_set_count); 993 if (new_alloc) 994 { 995 out->const_set = new_alloc; 996 out->const_set_size = out->const_set_count; 997 } 998 else 999 { 1000 WARN("Out of memory.\n"); 1001 } 1002 } 1003 cleanup: 1004 ID3DXConstantTable_Release(ctab); 1005 return hr; 1006 } 1007 1008 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register) 1009 { 1010 if (table < PRES_REGTAB_COUNT) 1011 table_sizes[table] = max(table_sizes[table], max_register + 1); 1012 } 1013 1014 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab) 1015 { 1016 unsigned int i, table, max_register; 1017 1018 for (i = 0; i < ctab->input_count; ++i) 1019 { 1020 if (!ctab->inputs[i].RegisterCount) 1021 continue; 1022 max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1; 1023 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1024 update_table_size(table_sizes, table, max_register); 1025 } 1026 } 1027 1028 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count) 1029 { 1030 static const char *xyzw_str = "xyzw"; 1031 unsigned int i, table; 1032 1033 table = arg->reg.table; 1034 if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT) 1035 { 1036 TRACE("("); 1037 for (i = 0; i < component_count; ++i) 1038 TRACE(i < component_count - 1 ? "%.16e, " : "%.16e", 1039 ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]); 1040 TRACE(")"); 1041 } 1042 else 1043 { 1044 if (arg->index_reg.table == PRES_REGTAB_COUNT) 1045 { 1046 TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset)); 1047 } 1048 else 1049 { 1050 unsigned int index_reg; 1051 1052 index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset); 1053 TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset), 1054 table_symbol[arg->index_reg.table], index_reg, 1055 xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]); 1056 } 1057 for (i = 0; i < component_count; ++i) 1058 TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]); 1059 } 1060 } 1061 1062 static void dump_registers(struct d3dx_const_tab *ctab) 1063 { 1064 unsigned int table, i; 1065 1066 for (i = 0; i < ctab->input_count; ++i) 1067 { 1068 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1069 TRACE("// %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)", 1070 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount); 1071 } 1072 } 1073 1074 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins) 1075 { 1076 unsigned int i; 1077 1078 TRACE("%s ", pres_op_info[ins->op].mnem); 1079 dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count); 1080 for (i = 0; i < pres_op_info[ins->op].input_count; ++i) 1081 { 1082 TRACE(", "); 1083 dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count); 1084 } 1085 TRACE("\n"); 1086 } 1087 1088 static void dump_preshader(struct d3dx_preshader *pres) 1089 { 1090 unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4; 1091 const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED]; 1092 1093 if (immediate_count) 1094 TRACE("// Immediates:\n"); 1095 for (i = 0; i < immediate_count; ++i) 1096 { 1097 if (!(i % 4)) 1098 TRACE("// "); 1099 TRACE("%.8e", immediates[i]); 1100 if (i % 4 == 3) 1101 TRACE("\n"); 1102 else 1103 TRACE(", "); 1104 } 1105 TRACE("// Preshader registers:\n"); 1106 dump_registers(&pres->inputs); 1107 TRACE("preshader\n"); 1108 for (i = 0; i < pres->ins_count; ++i) 1109 dump_ins(&pres->regs, &pres->ins[i]); 1110 } 1111 1112 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base) 1113 { 1114 unsigned int *p; 1115 unsigned int i, j, const_count; 1116 double *dconst; 1117 HRESULT hr; 1118 unsigned int saved_word; 1119 unsigned int section_size; 1120 1121 TRACE("Preshader version %#x.\n", *ptr & 0xffff); 1122 1123 if (!count) 1124 { 1125 WARN("Unexpected end of byte code buffer.\n"); 1126 return D3DXERR_INVALIDDATA; 1127 } 1128 1129 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, §ion_size); 1130 if (p) 1131 { 1132 const_count = *p++; 1133 if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int))) 1134 { 1135 WARN("Byte code buffer ends unexpectedly.\n"); 1136 return D3DXERR_INVALIDDATA; 1137 } 1138 dconst = (double *)p; 1139 } 1140 else 1141 { 1142 const_count = 0; 1143 dconst = NULL; 1144 } 1145 TRACE("%u double constants.\n", const_count); 1146 1147 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, §ion_size); 1148 if (!p) 1149 { 1150 WARN("Could not find preshader code.\n"); 1151 return D3D_OK; 1152 } 1153 pres->ins_count = *p++; 1154 --section_size; 1155 if (pres->ins_count > UINT_MAX / sizeof(*pres->ins)) 1156 { 1157 WARN("Invalid instruction count %u.\n", pres->ins_count); 1158 return D3DXERR_INVALIDDATA; 1159 } 1160 TRACE("%u instructions.\n", pres->ins_count); 1161 pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count); 1162 if (!pres->ins) 1163 return E_OUTOFMEMORY; 1164 for (i = 0; i < pres->ins_count; ++i) 1165 { 1166 unsigned int *ptr_next; 1167 1168 ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]); 1169 if (!ptr_next) 1170 return D3DXERR_INVALIDDATA; 1171 section_size -= ptr_next - p; 1172 p = ptr_next; 1173 } 1174 1175 pres->inputs.regset2table = pres_regset2table; 1176 1177 saved_word = *ptr; 1178 *ptr = 0xfffe0000; 1179 hr = get_constants_desc(ptr, &pres->inputs, base, NULL, 0, NULL); 1180 *ptr = saved_word; 1181 if (FAILED(hr)) 1182 return hr; 1183 1184 if (const_count % get_reg_components(PRES_REGTAB_IMMED)) 1185 { 1186 FIXME("const_count %u is not a multiple of %u.\n", const_count, 1187 get_reg_components(PRES_REGTAB_IMMED)); 1188 return D3DXERR_INVALIDDATA; 1189 } 1190 pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count); 1191 1192 update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs); 1193 for (i = 0; i < pres->ins_count; ++i) 1194 { 1195 for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j) 1196 { 1197 enum pres_reg_tables table; 1198 unsigned int reg_idx; 1199 1200 if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT) 1201 { 1202 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0 1203 : pres->ins[i].component_count - 1; 1204 1205 table = pres->ins[i].inputs[j].reg.table; 1206 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset 1207 + last_component_index); 1208 } 1209 else 1210 { 1211 table = pres->ins[i].inputs[j].index_reg.table; 1212 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset); 1213 } 1214 if (reg_idx >= pres->regs.table_sizes[table]) 1215 { 1216 /* Native accepts these broken preshaders. */ 1217 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u, preshader parsing failed.\n", 1218 i, j, table, reg_idx); 1219 return D3DXERR_INVALIDDATA; 1220 } 1221 } 1222 update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table, 1223 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset)); 1224 } 1225 if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED))) 1226 return E_OUTOFMEMORY; 1227 regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count); 1228 1229 return D3D_OK; 1230 } 1231 1232 HRESULT d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size, 1233 D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter, 1234 const char **skip_constants, unsigned int skip_constants_count) 1235 { 1236 struct d3dx_param_eval *peval; 1237 unsigned int *ptr, *shader_ptr = NULL; 1238 unsigned int i; 1239 BOOL shader; 1240 unsigned int count, pres_size; 1241 HRESULT ret; 1242 1243 TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n", 1244 base_effect, byte_code, byte_code_size, type, peval_out); 1245 1246 count = byte_code_size / sizeof(unsigned int); 1247 if (!byte_code || !count) 1248 { 1249 *peval_out = NULL; 1250 return D3D_OK; 1251 } 1252 1253 peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval)); 1254 if (!peval) 1255 { 1256 ret = E_OUTOFMEMORY; 1257 goto err_out; 1258 } 1259 peval->version_counter = version_counter; 1260 1261 peval->param_type = type; 1262 switch (type) 1263 { 1264 case D3DXPT_VERTEXSHADER: 1265 case D3DXPT_PIXELSHADER: 1266 shader = TRUE; 1267 break; 1268 default: 1269 shader = FALSE; 1270 break; 1271 } 1272 peval->shader_inputs.regset2table = shad_regset2table; 1273 1274 ptr = (unsigned int *)byte_code; 1275 if (shader) 1276 { 1277 if ((*ptr & 0xfffe0000) != 0xfffe0000) 1278 { 1279 FIXME("Invalid shader signature %#x.\n", *ptr); 1280 ret = D3DXERR_INVALIDDATA; 1281 goto err_out; 1282 } 1283 TRACE("Shader version %#x.\n", *ptr & 0xffff); 1284 shader_ptr = ptr; 1285 ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size); 1286 if (!ptr) 1287 TRACE("No preshader found.\n"); 1288 } 1289 else 1290 { 1291 pres_size = count; 1292 } 1293 1294 if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, base_effect))) 1295 { 1296 FIXME("Failed parsing preshader, byte code for analysis follows.\n"); 1297 dump_bytecode(byte_code, byte_code_size); 1298 goto err_out; 1299 } 1300 1301 if (shader) 1302 { 1303 if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, base_effect, 1304 skip_constants, skip_constants_count, &peval->pres))) 1305 { 1306 TRACE("Could not get shader constant table, hr %#x.\n", ret); 1307 goto err_out; 1308 } 1309 update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs); 1310 } 1311 1312 for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i) 1313 { 1314 if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i))) 1315 goto err_out; 1316 } 1317 1318 if (TRACE_ON(d3dx)) 1319 { 1320 dump_bytecode(byte_code, byte_code_size); 1321 dump_preshader(&peval->pres); 1322 if (shader) 1323 { 1324 TRACE("// Shader registers:\n"); 1325 dump_registers(&peval->shader_inputs); 1326 } 1327 } 1328 *peval_out = peval; 1329 TRACE("Created parameter evaluator %p.\n", *peval_out); 1330 return D3D_OK; 1331 1332 err_out: 1333 WARN("Error creating parameter evaluator.\n"); 1334 if (TRACE_ON(d3dx)) 1335 dump_bytecode(byte_code, byte_code_size); 1336 1337 d3dx_free_param_eval(peval); 1338 *peval_out = NULL; 1339 return ret; 1340 } 1341 1342 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab) 1343 { 1344 HeapFree(GetProcessHeap(), 0, ctab->inputs); 1345 HeapFree(GetProcessHeap(), 0, ctab->inputs_param); 1346 HeapFree(GetProcessHeap(), 0, ctab->const_set); 1347 } 1348 1349 static void d3dx_free_preshader(struct d3dx_preshader *pres) 1350 { 1351 HeapFree(GetProcessHeap(), 0, pres->ins); 1352 1353 regstore_free_tables(&pres->regs); 1354 d3dx_free_const_tab(&pres->inputs); 1355 } 1356 1357 void d3dx_free_param_eval(struct d3dx_param_eval *peval) 1358 { 1359 TRACE("peval %p.\n", peval); 1360 1361 if (!peval) 1362 return; 1363 1364 d3dx_free_preshader(&peval->pres); 1365 d3dx_free_const_tab(&peval->shader_inputs); 1366 HeapFree(GetProcessHeap(), 0, peval); 1367 } 1368 1369 static void pres_int_from_float(void *out, const void *in, unsigned int count) 1370 { 1371 unsigned int i; 1372 const float *in_float = in; 1373 int *out_int = out; 1374 1375 for (i = 0; i < count; ++i) 1376 out_int[i] = in_float[i]; 1377 } 1378 1379 static void pres_bool_from_value(void *out, const void *in, unsigned int count) 1380 { 1381 unsigned int i; 1382 const DWORD *in_dword = in; 1383 BOOL *out_bool = out; 1384 1385 for (i = 0; i < count; ++i) 1386 out_bool[i] = !!in_dword[i]; 1387 } 1388 1389 static void pres_float_from_int(void *out, const void *in, unsigned int count) 1390 { 1391 unsigned int i; 1392 const int *in_int = in; 1393 float *out_float = out; 1394 1395 for (i = 0; i < count; ++i) 1396 out_float[i] = in_int[i]; 1397 } 1398 1399 static void pres_float_from_bool(void *out, const void *in, unsigned int count) 1400 { 1401 unsigned int i; 1402 const BOOL *in_bool = in; 1403 float *out_float = out; 1404 1405 for (i = 0; i < count; ++i) 1406 out_float[i] = !!in_bool[i]; 1407 } 1408 1409 static void pres_int_from_bool(void *out, const void *in, unsigned int count) 1410 { 1411 unsigned int i; 1412 const float *in_bool = in; 1413 int *out_int = out; 1414 1415 for (i = 0; i < count; ++i) 1416 out_int[i] = !!in_bool[i]; 1417 } 1418 1419 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table, 1420 unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type) 1421 { 1422 typedef void (*conv_func)(void *out, const void *in, unsigned int count); 1423 static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] = 1424 { 1425 {NULL, NULL, pres_int_from_float, pres_bool_from_value}, 1426 {NULL, NULL, NULL, NULL}, 1427 {pres_float_from_int, NULL, NULL, pres_bool_from_value}, 1428 {pres_float_from_bool, NULL, pres_int_from_bool, NULL} 1429 }; 1430 enum pres_value_type table_type = table_info[table].type; 1431 1432 if (param_type == table_type) 1433 { 1434 regstore_set_values(rs, table, in, offset, count); 1435 return; 1436 } 1437 1438 set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count); 1439 } 1440 1441 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1442 D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr, 1443 unsigned int start, unsigned int count) 1444 { 1445 if (type == D3DXPT_VERTEXSHADER) 1446 { 1447 switch(table) 1448 { 1449 case PRES_REGTAB_OCONST: 1450 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count); 1451 case PRES_REGTAB_OICONST: 1452 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count); 1453 case PRES_REGTAB_OBCONST: 1454 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count); 1455 default: 1456 FIXME("Unexpected register table %u.\n", table); 1457 return D3DERR_INVALIDCALL; 1458 } 1459 } 1460 else if (type == D3DXPT_PIXELSHADER) 1461 { 1462 switch(table) 1463 { 1464 case PRES_REGTAB_OCONST: 1465 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count); 1466 case PRES_REGTAB_OICONST: 1467 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count); 1468 case PRES_REGTAB_OBCONST: 1469 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count); 1470 default: 1471 FIXME("Unexpected register table %u.\n", table); 1472 return D3DERR_INVALIDCALL; 1473 } 1474 } 1475 else 1476 { 1477 FIXME("Unexpected parameter type %u.\n", type); 1478 return D3DERR_INVALIDCALL; 1479 } 1480 } 1481 1482 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab, 1483 ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1484 D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty) 1485 { 1486 unsigned int const_idx; 1487 unsigned int current_start = 0, current_count = 0; 1488 enum pres_reg_tables current_table = PRES_REGTAB_COUNT; 1489 BOOL update_device = manager || device; 1490 HRESULT hr, result = D3D_OK; 1491 ULONG64 update_version = const_tab->update_version; 1492 1493 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1494 { 1495 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1496 enum pres_reg_tables table = const_set->table; 1497 struct d3dx_parameter *param = const_set->param; 1498 unsigned int element, i, j, start_offset; 1499 struct const_upload_info info; 1500 unsigned int *data; 1501 enum pres_value_type param_type; 1502 1503 if (!(param && is_param_dirty(param, update_version))) 1504 continue; 1505 1506 data = param->data; 1507 start_offset = get_offset_reg(table, const_set->register_index); 1508 if (const_set->direct_copy) 1509 { 1510 regstore_set_values(rs, table, data, start_offset, 1511 get_offset_reg(table, const_set->register_count)); 1512 continue; 1513 } 1514 param_type = table_type_from_param_type(param->type); 1515 if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR) 1516 { 1517 unsigned int count = max(param->rows, param->columns); 1518 1519 if (count >= get_reg_components(table)) 1520 { 1521 regstore_set_data(rs, table, start_offset, data, 1522 count * const_set->element_count, param_type); 1523 } 1524 else 1525 { 1526 for (element = 0; element < const_set->element_count; ++element) 1527 regstore_set_data(rs, table, start_offset + get_offset_reg(table, element), 1528 &data[element * count], count, param_type); 1529 } 1530 continue; 1531 } 1532 get_const_upload_info(const_set, &info); 1533 for (element = 0; element < const_set->element_count; ++element) 1534 { 1535 unsigned int *out = (unsigned int *)rs->tables[table] + start_offset; 1536 1537 /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer. 1538 * All the supported types of parameters and table values have the same size. */ 1539 if (info.transpose) 1540 { 1541 for (i = 0; i < info.major_count; ++i) 1542 for (j = 0; j < info.minor; ++j) 1543 out[i * info.major_stride + j] = data[i + j * info.major]; 1544 1545 for (j = 0; j < info.minor_remainder; ++j) 1546 out[i * info.major_stride + j] = data[i + j * info.major]; 1547 } 1548 else 1549 { 1550 for (i = 0; i < info.major_count; ++i) 1551 for (j = 0; j < info.minor; ++j) 1552 out[i * info.major_stride + j] = data[i * info.minor + j]; 1553 } 1554 start_offset += get_offset_reg(table, const_set->register_count); 1555 data += param->rows * param->columns; 1556 } 1557 start_offset = get_offset_reg(table, const_set->register_index); 1558 if (table_info[table].type != param_type) 1559 regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset, 1560 get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type); 1561 } 1562 const_tab->update_version = new_update_version; 1563 if (!update_device) 1564 return D3D_OK; 1565 1566 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1567 { 1568 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1569 1570 if (device_update_all || (const_set->param 1571 ? is_param_dirty(const_set->param, update_version) : pres_dirty)) 1572 { 1573 enum pres_reg_tables table = const_set->table; 1574 1575 if (table == current_table && current_start + current_count == const_set->register_index) 1576 { 1577 current_count += const_set->register_count * const_set->element_count; 1578 } 1579 else 1580 { 1581 if (current_count) 1582 { 1583 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1584 (DWORD *)rs->tables[current_table] 1585 + get_offset_reg(current_table, current_start), current_start, current_count))) 1586 result = hr; 1587 } 1588 current_table = table; 1589 current_start = const_set->register_index; 1590 current_count = const_set->register_count * const_set->element_count; 1591 } 1592 } 1593 } 1594 if (current_count) 1595 { 1596 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1597 (DWORD *)rs->tables[current_table] 1598 + get_offset_reg(current_table, current_start), current_start, current_count))) 1599 result = hr; 1600 } 1601 return result; 1602 } 1603 1604 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset) 1605 { 1606 return regstore_get_double(rs, table, offset); 1607 } 1608 1609 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp) 1610 { 1611 unsigned int offset, base_index, reg_index, table; 1612 1613 table = opr->reg.table; 1614 1615 if (opr->index_reg.table == PRES_REGTAB_COUNT) 1616 base_index = 0; 1617 else 1618 base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset)); 1619 1620 offset = get_offset_reg(table, base_index) + opr->reg.offset + comp; 1621 reg_index = get_reg_offset(table, offset); 1622 1623 if (reg_index >= rs->table_sizes[table]) 1624 { 1625 unsigned int wrap_size; 1626 1627 if (table == PRES_REGTAB_CONST) 1628 { 1629 /* As it can be guessed from tests, offset into floating constant table is wrapped 1630 * to the nearest power of 2 and not to the actual table size. */ 1631 for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1) 1632 ; 1633 } 1634 else 1635 { 1636 wrap_size = rs->table_sizes[table]; 1637 } 1638 WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n", 1639 reg_index, table, wrap_size, rs->table_sizes[table]); 1640 reg_index %= wrap_size; 1641 1642 if (reg_index >= rs->table_sizes[table]) 1643 return 0.0; 1644 1645 offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table); 1646 } 1647 1648 return exec_get_reg_value(rs, table, offset); 1649 } 1650 1651 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg, 1652 unsigned int comp, double res) 1653 { 1654 regstore_set_double(rs, reg->table, reg->offset + comp, res); 1655 } 1656 1657 #define ARGS_ARRAY_SIZE 8 1658 static HRESULT execute_preshader(struct d3dx_preshader *pres) 1659 { 1660 unsigned int i, j, k; 1661 double args[ARGS_ARRAY_SIZE]; 1662 double res; 1663 1664 for (i = 0; i < pres->ins_count; ++i) 1665 { 1666 const struct d3dx_pres_ins *ins; 1667 const struct op_info *oi; 1668 1669 ins = &pres->ins[i]; 1670 oi = &pres_op_info[ins->op]; 1671 if (oi->func_all_comps) 1672 { 1673 if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE) 1674 { 1675 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count); 1676 return E_FAIL; 1677 } 1678 for (k = 0; k < oi->input_count; ++k) 1679 for (j = 0; j < ins->component_count; ++j) 1680 args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k], 1681 ins->scalar_op && !k ? 0 : j); 1682 res = oi->func(args, ins->component_count); 1683 1684 /* only 'dot' instruction currently falls here */ 1685 exec_set_arg(&pres->regs, &ins->output.reg, 0, res); 1686 } 1687 else 1688 { 1689 for (j = 0; j < ins->component_count; ++j) 1690 { 1691 for (k = 0; k < oi->input_count; ++k) 1692 args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j); 1693 res = oi->func(args, ins->component_count); 1694 exec_set_arg(&pres->regs, &ins->output.reg, j, res); 1695 } 1696 } 1697 } 1698 return D3D_OK; 1699 } 1700 1701 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version) 1702 { 1703 unsigned int i; 1704 1705 if (update_version == ULONG64_MAX) 1706 update_version = ctab->update_version; 1707 for (i = 0; i < ctab->input_count; ++i) 1708 { 1709 if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]), 1710 update_version)) 1711 return TRUE; 1712 } 1713 return FALSE; 1714 } 1715 1716 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version) 1717 { 1718 return is_const_tab_input_dirty(&peval->pres.inputs, update_version) 1719 || is_const_tab_input_dirty(&peval->shader_inputs, update_version); 1720 } 1721 1722 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param, 1723 void *param_value) 1724 { 1725 HRESULT hr; 1726 unsigned int i; 1727 unsigned int elements, elements_param, elements_table; 1728 float *oc; 1729 1730 TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value); 1731 1732 if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX)) 1733 { 1734 set_constants(&peval->pres.regs, &peval->pres.inputs, 1735 next_update_version(peval->version_counter), 1736 NULL, NULL, peval->param_type, FALSE, FALSE); 1737 1738 if (FAILED(hr = execute_preshader(&peval->pres))) 1739 return hr; 1740 } 1741 1742 elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]); 1743 elements_param = param->bytes / sizeof(unsigned int); 1744 elements = min(elements_table, elements_param); 1745 oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST]; 1746 for (i = 0; i < elements; ++i) 1747 set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT); 1748 return D3D_OK; 1749 } 1750 1751 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1752 struct d3dx_param_eval *peval, BOOL update_all) 1753 { 1754 HRESULT hr; 1755 struct d3dx_preshader *pres = &peval->pres; 1756 struct d3dx_regstore *rs = &pres->regs; 1757 ULONG64 new_update_version = next_update_version(peval->version_counter); 1758 BOOL pres_dirty = FALSE; 1759 1760 TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type); 1761 1762 if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX)) 1763 { 1764 set_constants(rs, &pres->inputs, new_update_version, 1765 NULL, NULL, peval->param_type, FALSE, FALSE); 1766 if (FAILED(hr = execute_preshader(pres))) 1767 return hr; 1768 pres_dirty = TRUE; 1769 } 1770 1771 return set_constants(rs, &peval->shader_inputs, new_update_version, 1772 manager, device, peval->param_type, update_all, pres_dirty); 1773 } 1774