1 /* 2 * Copyright 2016 Paul Gofman 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * This library is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with this library; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 17 */ 18 19 #include "d3dx9_36_private.h" 20 21 #include <assert.h> 22 23 /* ReactOS FIXME: Insect */ 24 #define fmin min 25 #define fmax max 26 27 enum pres_ops 28 { 29 PRESHADER_OP_NOP, 30 PRESHADER_OP_MOV, 31 PRESHADER_OP_NEG, 32 PRESHADER_OP_RCP, 33 PRESHADER_OP_FRC, 34 PRESHADER_OP_EXP, 35 PRESHADER_OP_LOG, 36 PRESHADER_OP_RSQ, 37 PRESHADER_OP_SIN, 38 PRESHADER_OP_COS, 39 PRESHADER_OP_ASIN, 40 PRESHADER_OP_ACOS, 41 PRESHADER_OP_ATAN, 42 PRESHADER_OP_MIN, 43 PRESHADER_OP_MAX, 44 PRESHADER_OP_LT, 45 PRESHADER_OP_GE, 46 PRESHADER_OP_ADD, 47 PRESHADER_OP_MUL, 48 PRESHADER_OP_ATAN2, 49 PRESHADER_OP_DIV, 50 PRESHADER_OP_CMP, 51 PRESHADER_OP_DOT, 52 PRESHADER_OP_DOTSWIZ6, 53 PRESHADER_OP_DOTSWIZ8, 54 }; 55 56 typedef double (*pres_op_func)(double *args, int n); 57 58 static double to_signed_nan(double v) 59 { 60 static const union 61 { 62 ULONG64 ulong64_value; 63 double double_value; 64 } 65 signed_nan = 66 { 67 0xfff8000000000000 68 }; 69 70 return isnan(v) ? signed_nan.double_value : v; 71 } 72 73 static double pres_mov(double *args, int n) {return args[0];} 74 static double pres_add(double *args, int n) {return args[0] + args[1];} 75 static double pres_mul(double *args, int n) {return args[0] * args[1];} 76 static double pres_dot(double *args, int n) 77 { 78 int i; 79 double sum; 80 81 sum = 0.0; 82 for (i = 0; i < n; ++i) 83 sum += args[i] * args[i + n]; 84 return sum; 85 } 86 87 static double pres_dotswiz6(double *args, int n) 88 { 89 return pres_dot(args, 3); 90 } 91 92 static double pres_dotswiz8(double *args, int n) 93 { 94 return pres_dot(args, 4); 95 } 96 97 static double pres_neg(double *args, int n) {return -args[0];} 98 static double pres_rcp(double *args, int n) {return 1.0 / args[0];} 99 static double pres_lt(double *args, int n) {return args[0] < args[1] ? 1.0 : 0.0;} 100 static double pres_ge(double *args, int n) {return args[0] >= args[1] ? 1.0 : 0.0;} 101 static double pres_frc(double *args, int n) {return args[0] - floor(args[0]);} 102 static double pres_min(double *args, int n) {return fmin(args[0], args[1]);} 103 static double pres_max(double *args, int n) {return fmax(args[0], args[1]);} 104 static double pres_cmp(double *args, int n) {return args[0] >= 0.0 ? args[1] : args[2];} 105 static double pres_sin(double *args, int n) {return sin(args[0]);} 106 static double pres_cos(double *args, int n) {return cos(args[0]);} 107 static double pres_rsq(double *args, int n) 108 { 109 double v; 110 111 v = fabs(args[0]); 112 if (v == 0.0) 113 return INFINITY; 114 else 115 return 1.0 / sqrt(v); 116 } 117 static double pres_exp(double *args, int n) {return pow(2.0, args[0]);} 118 static double pres_log(double *args, int n) 119 { 120 double v; 121 122 v = fabs(args[0]); 123 if (v == 0.0) 124 return 0.0; 125 else 126 #ifdef HAVE_LOG2 127 return log2(v); 128 #else 129 return log(v) / log(2); 130 #endif 131 } 132 static double pres_asin(double *args, int n) {return to_signed_nan(asin(args[0]));} 133 static double pres_acos(double *args, int n) {return to_signed_nan(acos(args[0]));} 134 static double pres_atan(double *args, int n) {return atan(args[0]);} 135 static double pres_atan2(double *args, int n) {return atan2(args[0], args[1]);} 136 137 /* According to the test results 'div' operation always returns 0. Compiler does not seem to ever 138 * generate it, using rcp + mul instead, so probably it is not implemented in native d3dx. */ 139 static double pres_div(double *args, int n) {return 0.0;} 140 141 #define PRES_OPCODE_MASK 0x7ff00000 142 #define PRES_OPCODE_SHIFT 20 143 #define PRES_SCALAR_FLAG 0x80000000 144 #define PRES_NCOMP_MASK 0x0000ffff 145 146 #define FOURCC_PRES 0x53455250 147 #define FOURCC_CLIT 0x54494c43 148 #define FOURCC_FXLC 0x434c5846 149 #define FOURCC_PRSI 0x49535250 150 #define PRES_SIGN 0x46580000 151 152 struct op_info 153 { 154 unsigned int opcode; 155 char mnem[16]; 156 unsigned int input_count; 157 BOOL func_all_comps; 158 pres_op_func func; 159 }; 160 161 static const struct op_info pres_op_info[] = 162 { 163 {0x000, "nop", 0, 0, NULL }, /* PRESHADER_OP_NOP */ 164 {0x100, "mov", 1, 0, pres_mov}, /* PRESHADER_OP_MOV */ 165 {0x101, "neg", 1, 0, pres_neg}, /* PRESHADER_OP_NEG */ 166 {0x103, "rcp", 1, 0, pres_rcp}, /* PRESHADER_OP_RCP */ 167 {0x104, "frc", 1, 0, pres_frc}, /* PRESHADER_OP_FRC */ 168 {0x105, "exp", 1, 0, pres_exp}, /* PRESHADER_OP_EXP */ 169 {0x106, "log", 1, 0, pres_log}, /* PRESHADER_OP_LOG */ 170 {0x107, "rsq", 1, 0, pres_rsq}, /* PRESHADER_OP_RSQ */ 171 {0x108, "sin", 1, 0, pres_sin}, /* PRESHADER_OP_SIN */ 172 {0x109, "cos", 1, 0, pres_cos}, /* PRESHADER_OP_COS */ 173 {0x10a, "asin", 1, 0, pres_asin}, /* PRESHADER_OP_ASIN */ 174 {0x10b, "acos", 1, 0, pres_acos}, /* PRESHADER_OP_ACOS */ 175 {0x10c, "atan", 1, 0, pres_atan}, /* PRESHADER_OP_ATAN */ 176 {0x200, "min", 2, 0, pres_min}, /* PRESHADER_OP_MIN */ 177 {0x201, "max", 2, 0, pres_max}, /* PRESHADER_OP_MAX */ 178 {0x202, "lt", 2, 0, pres_lt }, /* PRESHADER_OP_LT */ 179 {0x203, "ge", 2, 0, pres_ge }, /* PRESHADER_OP_GE */ 180 {0x204, "add", 2, 0, pres_add}, /* PRESHADER_OP_ADD */ 181 {0x205, "mul", 2, 0, pres_mul}, /* PRESHADER_OP_MUL */ 182 {0x206, "atan2", 2, 0, pres_atan2}, /* PRESHADER_OP_ATAN2 */ 183 {0x208, "div", 2, 0, pres_div}, /* PRESHADER_OP_DIV */ 184 {0x300, "cmp", 3, 0, pres_cmp}, /* PRESHADER_OP_CMP */ 185 {0x500, "dot", 2, 1, pres_dot}, /* PRESHADER_OP_DOT */ 186 {0x70e, "d3ds_dotswiz", 6, 0, pres_dotswiz6}, /* PRESHADER_OP_DOTSWIZ6 */ 187 {0x70e, "d3ds_dotswiz", 8, 0, pres_dotswiz8}, /* PRESHADER_OP_DOTSWIZ8 */ 188 }; 189 190 enum pres_value_type 191 { 192 PRES_VT_FLOAT, 193 PRES_VT_DOUBLE, 194 PRES_VT_INT, 195 PRES_VT_BOOL, 196 PRES_VT_COUNT 197 }; 198 199 static const struct 200 { 201 unsigned int component_size; 202 enum pres_value_type type; 203 } 204 table_info[] = 205 { 206 {sizeof(double), PRES_VT_DOUBLE}, /* PRES_REGTAB_IMMED */ 207 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_CONST */ 208 {sizeof(float), PRES_VT_FLOAT }, /* PRES_REGTAB_OCONST */ 209 {sizeof(BOOL), PRES_VT_BOOL }, /* PRES_REGTAB_OBCONST */ 210 {sizeof(int), PRES_VT_INT, }, /* PRES_REGTAB_OICONST */ 211 /* TODO: use double precision for 64 bit */ 212 {sizeof(float), PRES_VT_FLOAT } /* PRES_REGTAB_TEMP */ 213 }; 214 215 static const char *table_symbol[] = 216 { 217 "imm", "c", "oc", "ob", "oi", "r", "(null)", 218 }; 219 220 static const enum pres_reg_tables pres_regset2table[] = 221 { 222 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 223 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 224 PRES_REGTAB_CONST, /* D3DXRS_FLOAT4 */ 225 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 226 }; 227 228 static const enum pres_reg_tables shad_regset2table[] = 229 { 230 PRES_REGTAB_OBCONST, /* D3DXRS_BOOL */ 231 PRES_REGTAB_OICONST, /* D3DXRS_INT4 */ 232 PRES_REGTAB_OCONST, /* D3DXRS_FLOAT4 */ 233 PRES_REGTAB_COUNT, /* D3DXRS_SAMPLER */ 234 }; 235 236 struct d3dx_pres_reg 237 { 238 enum pres_reg_tables table; 239 /* offset is component index, not register index, e. g. 240 offset for component c3.y is 13 (3 * 4 + 1) */ 241 unsigned int offset; 242 }; 243 244 struct d3dx_pres_operand 245 { 246 struct d3dx_pres_reg reg; 247 struct d3dx_pres_reg index_reg; 248 }; 249 250 #define MAX_INPUTS_COUNT 8 251 252 struct d3dx_pres_ins 253 { 254 enum pres_ops op; 255 /* first input argument is scalar, 256 scalar component is propagated */ 257 BOOL scalar_op; 258 unsigned int component_count; 259 struct d3dx_pres_operand inputs[MAX_INPUTS_COUNT]; 260 struct d3dx_pres_operand output; 261 }; 262 263 struct const_upload_info 264 { 265 BOOL transpose; 266 unsigned int major, minor; 267 unsigned int major_stride; 268 unsigned int major_count; 269 unsigned int count; 270 unsigned int minor_remainder; 271 }; 272 273 static enum pres_value_type table_type_from_param_type(D3DXPARAMETER_TYPE type) 274 { 275 switch (type) 276 { 277 case D3DXPT_FLOAT: 278 return PRES_VT_FLOAT; 279 case D3DXPT_INT: 280 return PRES_VT_INT; 281 case D3DXPT_BOOL: 282 return PRES_VT_BOOL; 283 default: 284 FIXME("Unsupported type %u.\n", type); 285 return PRES_VT_COUNT; 286 } 287 } 288 289 static unsigned int get_reg_offset(unsigned int table, unsigned int offset) 290 { 291 return table == PRES_REGTAB_OBCONST ? offset : offset >> 2; 292 } 293 294 static unsigned int get_offset_reg(unsigned int table, unsigned int reg_idx) 295 { 296 return table == PRES_REGTAB_OBCONST ? reg_idx : reg_idx << 2; 297 } 298 299 static unsigned int get_reg_components(unsigned int table) 300 { 301 return get_offset_reg(table, 1); 302 } 303 304 #define PRES_BITMASK_BLOCK_SIZE (sizeof(unsigned int) * 8) 305 306 static HRESULT regstore_alloc_table(struct d3dx_regstore *rs, unsigned int table) 307 { 308 unsigned int size; 309 310 size = get_offset_reg(table, rs->table_sizes[table]) * table_info[table].component_size; 311 if (size) 312 { 313 rs->tables[table] = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size); 314 if (!rs->tables[table]) 315 return E_OUTOFMEMORY; 316 } 317 return D3D_OK; 318 } 319 320 static void regstore_free_tables(struct d3dx_regstore *rs) 321 { 322 unsigned int i; 323 324 for (i = 0; i < PRES_REGTAB_COUNT; ++i) 325 { 326 HeapFree(GetProcessHeap(), 0, rs->tables[i]); 327 } 328 } 329 330 static void regstore_set_values(struct d3dx_regstore *rs, unsigned int table, const void *data, 331 unsigned int start_offset, unsigned int count) 332 { 333 BYTE *dst = rs->tables[table]; 334 const BYTE *src = data; 335 unsigned int size; 336 337 dst += start_offset * table_info[table].component_size; 338 size = count * table_info[table].component_size; 339 assert((src < dst && size <= dst - src) || (src > dst && size <= src - dst)); 340 memcpy(dst, src, size); 341 } 342 343 static double regstore_get_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset) 344 { 345 BYTE *p; 346 347 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 348 switch (table_info[table].type) 349 { 350 case PRES_VT_FLOAT: 351 return *(float *)p; 352 case PRES_VT_DOUBLE: 353 return *(double *)p; 354 default: 355 FIXME("Unexpected preshader input from table %u.\n", table); 356 return NAN; 357 } 358 } 359 360 static void regstore_set_double(struct d3dx_regstore *rs, unsigned int table, unsigned int offset, double v) 361 { 362 BYTE *p; 363 364 p = (BYTE *)rs->tables[table] + table_info[table].component_size * offset; 365 switch (table_info[table].type) 366 { 367 case PRES_VT_FLOAT : *(float *)p = v; break; 368 case PRES_VT_DOUBLE: *(double *)p = v; break; 369 case PRES_VT_INT : *(int *)p = lrint(v); break; 370 case PRES_VT_BOOL : *(BOOL *)p = !!v; break; 371 default: 372 FIXME("Bad type %u.\n", table_info[table].type); 373 break; 374 } 375 } 376 377 static void dump_bytecode(void *data, unsigned int size) 378 { 379 unsigned int *bytecode = (unsigned int *)data; 380 unsigned int i, j, n; 381 382 size /= sizeof(*bytecode); 383 i = 0; 384 while (i < size) 385 { 386 n = min(size - i, 8); 387 for (j = 0; j < n; ++j) 388 TRACE("0x%08x,", bytecode[i + j]); 389 i += n; 390 TRACE("\n"); 391 } 392 } 393 394 static unsigned int *find_bytecode_comment(unsigned int *ptr, unsigned int count, 395 unsigned int fourcc, unsigned int *size) 396 { 397 /* Provide at least one value in comment section on non-NULL return. */ 398 while (count > 2 && (*ptr & 0xffff) == 0xfffe) 399 { 400 unsigned int section_size; 401 402 section_size = (*ptr >> 16); 403 if (!section_size || section_size + 1 > count) 404 break; 405 if (*(ptr + 1) == fourcc) 406 { 407 *size = section_size; 408 return ptr + 2; 409 } 410 count -= section_size + 1; 411 ptr += section_size + 1; 412 } 413 return NULL; 414 } 415 416 static unsigned int *parse_pres_reg(unsigned int *ptr, struct d3dx_pres_reg *reg) 417 { 418 static const enum pres_reg_tables reg_table[8] = 419 { 420 PRES_REGTAB_COUNT, PRES_REGTAB_IMMED, PRES_REGTAB_CONST, PRES_REGTAB_COUNT, 421 PRES_REGTAB_OCONST, PRES_REGTAB_OBCONST, PRES_REGTAB_OICONST, PRES_REGTAB_TEMP 422 }; 423 424 if (*ptr >= ARRAY_SIZE(reg_table) || reg_table[*ptr] == PRES_REGTAB_COUNT) 425 { 426 FIXME("Unsupported register table %#x.\n", *ptr); 427 return NULL; 428 } 429 430 reg->table = reg_table[*ptr++]; 431 reg->offset = *ptr++; 432 return ptr; 433 } 434 435 static unsigned int *parse_pres_arg(unsigned int *ptr, unsigned int count, struct d3dx_pres_operand *opr) 436 { 437 if (count < 3 || (*ptr && count < 5)) 438 { 439 WARN("Byte code buffer ends unexpectedly, count %u.\n", count); 440 return NULL; 441 } 442 443 if (*ptr) 444 { 445 if (*ptr != 1) 446 { 447 FIXME("Unknown relative addressing flag, word %#x.\n", *ptr); 448 return NULL; 449 } 450 ptr = parse_pres_reg(ptr + 1, &opr->index_reg); 451 if (!ptr) 452 return NULL; 453 } 454 else 455 { 456 opr->index_reg.table = PRES_REGTAB_COUNT; 457 ++ptr; 458 } 459 460 ptr = parse_pres_reg(ptr, &opr->reg); 461 462 if (opr->reg.table == PRES_REGTAB_OBCONST) 463 opr->reg.offset /= 4; 464 return ptr; 465 } 466 467 static unsigned int *parse_pres_ins(unsigned int *ptr, unsigned int count, struct d3dx_pres_ins *ins) 468 { 469 unsigned int ins_code, ins_raw; 470 unsigned int input_count; 471 unsigned int i; 472 473 if (count < 2) 474 { 475 WARN("Byte code buffer ends unexpectedly.\n"); 476 return NULL; 477 } 478 479 ins_raw = *ptr++; 480 ins_code = (ins_raw & PRES_OPCODE_MASK) >> PRES_OPCODE_SHIFT; 481 ins->component_count = ins_raw & PRES_NCOMP_MASK; 482 ins->scalar_op = !!(ins_raw & PRES_SCALAR_FLAG); 483 484 if (ins->component_count < 1 || ins->component_count > 4) 485 { 486 FIXME("Unsupported number of components %u.\n", ins->component_count); 487 return NULL; 488 } 489 input_count = *ptr++; 490 count -= 2; 491 for (i = 0; i < ARRAY_SIZE(pres_op_info); ++i) 492 if (ins_code == pres_op_info[i].opcode && input_count == pres_op_info[i].input_count) 493 break; 494 if (i == ARRAY_SIZE(pres_op_info)) 495 { 496 FIXME("Unknown opcode %#x, input_count %u, raw %#x.\n", ins_code, input_count, ins_raw); 497 return NULL; 498 } 499 ins->op = i; 500 if (input_count > ARRAY_SIZE(ins->inputs)) 501 { 502 FIXME("Actual input args count %u exceeds inputs array size, instruction %s.\n", input_count, 503 pres_op_info[i].mnem); 504 return NULL; 505 } 506 for (i = 0; i < input_count; ++i) 507 { 508 unsigned int *p; 509 510 p = parse_pres_arg(ptr, count, &ins->inputs[i]); 511 if (!p) 512 return NULL; 513 count -= p - ptr; 514 ptr = p; 515 } 516 ptr = parse_pres_arg(ptr, count, &ins->output); 517 if (ins->output.index_reg.table != PRES_REGTAB_COUNT) 518 { 519 FIXME("Relative addressing in output register not supported.\n"); 520 return NULL; 521 } 522 if (get_reg_offset(ins->output.reg.table, ins->output.reg.offset 523 + (pres_op_info[ins->op].func_all_comps ? 0 : ins->component_count - 1)) 524 != get_reg_offset(ins->output.reg.table, ins->output.reg.offset)) 525 { 526 FIXME("Instructions outputting multiple registers are not supported.\n"); 527 return NULL; 528 } 529 return ptr; 530 } 531 532 static HRESULT get_ctab_constant_desc(ID3DXConstantTable *ctab, D3DXHANDLE hc, D3DXCONSTANT_DESC *desc, 533 WORD *constantinfo_reserved) 534 { 535 const struct ctab_constant *constant = d3dx_shader_get_ctab_constant(ctab, hc); 536 537 if (!constant) 538 { 539 FIXME("Could not get constant desc.\n"); 540 return D3DERR_INVALIDCALL; 541 } 542 *desc = constant->desc; 543 if (constantinfo_reserved) 544 *constantinfo_reserved = constant->constantinfo_reserved; 545 return D3D_OK; 546 } 547 548 static void get_const_upload_info(struct d3dx_const_param_eval_output *const_set, 549 struct const_upload_info *info) 550 { 551 struct d3dx_parameter *param = const_set->param; 552 unsigned int table = const_set->table; 553 554 info->transpose = (const_set->constant_class == D3DXPC_MATRIX_COLUMNS && param->class == D3DXPC_MATRIX_ROWS) 555 || (param->class == D3DXPC_MATRIX_COLUMNS && const_set->constant_class == D3DXPC_MATRIX_ROWS); 556 if (const_set->constant_class == D3DXPC_MATRIX_COLUMNS) 557 { 558 info->major = param->columns; 559 info->minor = param->rows; 560 } 561 else 562 { 563 info->major = param->rows; 564 info->minor = param->columns; 565 } 566 567 if (get_reg_components(table) == 1) 568 { 569 unsigned int const_length = get_offset_reg(table, const_set->register_count); 570 571 info->major_stride = info->minor; 572 info->major_count = const_length / info->major_stride; 573 info->minor_remainder = const_length % info->major_stride; 574 } 575 else 576 { 577 info->major_stride = get_reg_components(table); 578 info->major_count = const_set->register_count; 579 info->minor_remainder = 0; 580 } 581 info->count = info->major_count * info->minor + info->minor_remainder; 582 } 583 584 #define INITIAL_CONST_SET_SIZE 16 585 586 static HRESULT append_const_set(struct d3dx_const_tab *const_tab, struct d3dx_const_param_eval_output *set) 587 { 588 if (const_tab->const_set_count >= const_tab->const_set_size) 589 { 590 unsigned int new_size; 591 struct d3dx_const_param_eval_output *new_alloc; 592 593 if (!const_tab->const_set_size) 594 { 595 new_size = INITIAL_CONST_SET_SIZE; 596 new_alloc = HeapAlloc(GetProcessHeap(), 0, sizeof(*const_tab->const_set) * new_size); 597 if (!new_alloc) 598 { 599 ERR("Out of memory.\n"); 600 return E_OUTOFMEMORY; 601 } 602 } 603 else 604 { 605 new_size = const_tab->const_set_size * 2; 606 new_alloc = HeapReAlloc(GetProcessHeap(), 0, const_tab->const_set, 607 sizeof(*const_tab->const_set) * new_size); 608 if (!new_alloc) 609 { 610 ERR("Out of memory.\n"); 611 return E_OUTOFMEMORY; 612 } 613 } 614 const_tab->const_set = new_alloc; 615 const_tab->const_set_size = new_size; 616 } 617 const_tab->const_set[const_tab->const_set_count++] = *set; 618 return D3D_OK; 619 } 620 621 static void append_pres_const_sets_for_shader_input(struct d3dx_const_tab *const_tab, 622 struct d3dx_preshader *pres) 623 { 624 unsigned int i; 625 struct d3dx_const_param_eval_output const_set = {NULL}; 626 627 for (i = 0; i < pres->ins_count; ++i) 628 { 629 const struct d3dx_pres_ins *ins = &pres->ins[i]; 630 const struct d3dx_pres_reg *reg = &ins->output.reg; 631 632 if (reg->table == PRES_REGTAB_TEMP) 633 continue; 634 635 const_set.register_index = get_reg_offset(reg->table, reg->offset); 636 const_set.register_count = 1; 637 const_set.table = reg->table; 638 const_set.constant_class = D3DXPC_FORCE_DWORD; 639 const_set.element_count = 1; 640 append_const_set(const_tab, &const_set); 641 } 642 } 643 644 static int compare_const_set(const void *a, const void *b) 645 { 646 const struct d3dx_const_param_eval_output *r1 = a; 647 const struct d3dx_const_param_eval_output *r2 = b; 648 649 if (r1->table != r2->table) 650 return r1->table - r2->table; 651 return r1->register_index - r2->register_index; 652 } 653 654 static HRESULT merge_const_set_entries(struct d3dx_const_tab *const_tab, 655 struct d3dx_parameter *param, unsigned int index) 656 { 657 unsigned int i, start_index = index; 658 DWORD *current_data; 659 enum pres_reg_tables current_table; 660 unsigned int current_start_offset, element_count; 661 struct d3dx_const_param_eval_output *first_const; 662 663 if (!const_tab->const_set_count) 664 return D3D_OK; 665 666 while (index < const_tab->const_set_count - 1) 667 { 668 first_const = &const_tab->const_set[index]; 669 current_data = first_const->param->data; 670 current_table = first_const->table; 671 current_start_offset = get_offset_reg(current_table, first_const->register_index); 672 element_count = 0; 673 for (i = index; i < const_tab->const_set_count; ++i) 674 { 675 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[i]; 676 unsigned int count = get_offset_reg(const_set->table, 677 const_set->register_count * const_set->element_count); 678 unsigned int start_offset = get_offset_reg(const_set->table, const_set->register_index); 679 680 if (!(const_set->table == current_table && current_start_offset == start_offset 681 && const_set->direct_copy == first_const->direct_copy 682 && current_data == const_set->param->data 683 && (const_set->direct_copy || (first_const->param->type == const_set->param->type 684 && first_const->param->class == const_set->param->class 685 && first_const->param->columns == const_set->param->columns 686 && first_const->param->rows == const_set->param->rows 687 && first_const->register_count == const_set->register_count 688 && (i == const_tab->const_set_count - 1 689 || first_const->param->element_count == const_set->param->element_count))))) 690 break; 691 692 current_start_offset += count; 693 current_data += const_set->direct_copy ? count : const_set->param->rows 694 * const_set->param->columns * const_set->element_count; 695 element_count += const_set->element_count; 696 } 697 698 if (i > index + 1) 699 { 700 TRACE("Merging %u child parameters for %s, not merging %u, direct_copy %#x.\n", i - index, 701 debugstr_a(param->name), const_tab->const_set_count - i, first_const->direct_copy); 702 703 first_const->element_count = element_count; 704 if (first_const->direct_copy) 705 { 706 first_const->element_count = 1; 707 if (index == start_index 708 && !(param->type == D3DXPT_VOID && param->class == D3DXPC_STRUCT)) 709 { 710 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 711 return D3DERR_INVALIDCALL; 712 first_const->param = param; 713 } 714 first_const->register_count = get_reg_offset(current_table, current_start_offset) 715 - first_const->register_index; 716 } 717 memmove(&const_tab->const_set[index + 1], &const_tab->const_set[i], 718 sizeof(*const_tab->const_set) * (const_tab->const_set_count - i)); 719 const_tab->const_set_count -= i - index - 1; 720 } 721 else 722 { 723 TRACE("Not merging %u child parameters for %s, direct_copy %#x.\n", 724 const_tab->const_set_count - i, debugstr_a(param->name), first_const->direct_copy); 725 } 726 index = i; 727 } 728 return D3D_OK; 729 } 730 731 static HRESULT init_set_constants_param(struct d3dx_const_tab *const_tab, ID3DXConstantTable *ctab, 732 D3DXHANDLE hc, struct d3dx_parameter *param) 733 { 734 D3DXCONSTANT_DESC desc; 735 unsigned int const_count, param_count, i; 736 BOOL get_element; 737 struct d3dx_const_param_eval_output const_set; 738 struct const_upload_info info; 739 enum pres_value_type table_type; 740 HRESULT hr; 741 742 if (FAILED(get_ctab_constant_desc(ctab, hc, &desc, NULL))) 743 return D3DERR_INVALIDCALL; 744 745 if (param->element_count) 746 { 747 param_count = param->element_count; 748 const_count = desc.Elements; 749 get_element = TRUE; 750 } 751 else 752 { 753 if (desc.Elements > 1) 754 { 755 FIXME("Unexpected number of constant elements %u.\n", desc.Elements); 756 return D3DERR_INVALIDCALL; 757 } 758 param_count = param->member_count; 759 const_count = desc.StructMembers; 760 get_element = FALSE; 761 } 762 if (const_count != param_count) 763 { 764 FIXME("Number of elements or struct members differs between parameter (%u) and constant (%u).\n", 765 param_count, const_count); 766 return D3DERR_INVALIDCALL; 767 } 768 if (const_count) 769 { 770 HRESULT ret = D3D_OK; 771 D3DXHANDLE hc_element; 772 unsigned int index = const_tab->const_set_count; 773 774 for (i = 0; i < const_count; ++i) 775 { 776 if (get_element) 777 hc_element = ID3DXConstantTable_GetConstantElement(ctab, hc, i); 778 else 779 hc_element = ID3DXConstantTable_GetConstant(ctab, hc, i); 780 if (!hc_element) 781 { 782 FIXME("Could not get constant.\n"); 783 hr = D3DERR_INVALIDCALL; 784 } 785 else 786 { 787 hr = init_set_constants_param(const_tab, ctab, hc_element, ¶m->members[i]); 788 } 789 if (FAILED(hr)) 790 ret = hr; 791 } 792 if (FAILED(ret)) 793 return ret; 794 return merge_const_set_entries(const_tab, param, index); 795 } 796 797 TRACE("Constant %s, rows %u, columns %u, class %u, bytes %u.\n", 798 debugstr_a(desc.Name), desc.Rows, desc.Columns, desc.Class, desc.Bytes); 799 TRACE("Parameter %s, rows %u, columns %u, class %u, flags %#x, bytes %u.\n", 800 debugstr_a(param->name), param->rows, param->columns, param->class, 801 param->flags, param->bytes); 802 803 const_set.element_count = 1; 804 const_set.param = param; 805 const_set.constant_class = desc.Class; 806 if (desc.RegisterSet >= ARRAY_SIZE(shad_regset2table)) 807 { 808 FIXME("Unknown register set %u.\n", desc.RegisterSet); 809 return D3DERR_INVALIDCALL; 810 } 811 const_set.register_index = desc.RegisterIndex; 812 const_set.table = const_tab->regset2table[desc.RegisterSet]; 813 if (const_set.table >= PRES_REGTAB_COUNT) 814 { 815 ERR("Unexpected register set %u.\n", desc.RegisterSet); 816 return D3DERR_INVALIDCALL; 817 } 818 assert(table_info[const_set.table].component_size == sizeof(unsigned int)); 819 assert(param->bytes / (param->rows * param->columns) == sizeof(unsigned int)); 820 const_set.register_count = desc.RegisterCount; 821 table_type = table_info[const_set.table].type; 822 get_const_upload_info(&const_set, &info); 823 if (!info.count) 824 { 825 TRACE("%s has zero count, skipping.\n", debugstr_a(param->name)); 826 return D3D_OK; 827 } 828 829 if (table_type_from_param_type(param->type) == PRES_VT_COUNT) 830 return D3DERR_INVALIDCALL; 831 832 const_set.direct_copy = table_type_from_param_type(param->type) == table_type 833 && !info.transpose && info.minor == info.major_stride 834 && info.count == get_offset_reg(const_set.table, const_set.register_count) 835 && info.count * sizeof(unsigned int) <= param->bytes; 836 if (info.minor_remainder && !const_set.direct_copy && !info.transpose) 837 FIXME("Incomplete last row for not transposed matrix which cannot be directly copied, parameter %s.\n", 838 debugstr_a(param->name)); 839 840 if (info.major_count > info.major 841 || (info.major_count == info.major && info.minor_remainder)) 842 { 843 WARN("Constant dimensions exceed parameter size.\n"); 844 return D3DERR_INVALIDCALL; 845 } 846 847 if (FAILED(hr = append_const_set(const_tab, &const_set))) 848 return hr; 849 850 return D3D_OK; 851 } 852 853 static HRESULT get_constants_desc(unsigned int *byte_code, struct d3dx_const_tab *out, 854 struct d3dx9_base_effect *base, const char **skip_constants, 855 unsigned int skip_constants_count, struct d3dx_preshader *pres) 856 { 857 ID3DXConstantTable *ctab; 858 D3DXCONSTANT_DESC *cdesc; 859 struct d3dx_parameter **inputs_param; 860 D3DXCONSTANTTABLE_DESC desc; 861 HRESULT hr; 862 D3DXHANDLE hc; 863 unsigned int i, j; 864 865 hr = D3DXGetShaderConstantTable(byte_code, &ctab); 866 if (FAILED(hr) || !ctab) 867 { 868 TRACE("Could not get CTAB data, hr %#x.\n", hr); 869 /* returning OK, shaders and preshaders without CTAB are valid */ 870 return D3D_OK; 871 } 872 if (FAILED(hr = ID3DXConstantTable_GetDesc(ctab, &desc))) 873 { 874 FIXME("Could not get CTAB desc, hr %#x.\n", hr); 875 goto cleanup; 876 } 877 878 out->inputs = cdesc = HeapAlloc(GetProcessHeap(), 0, sizeof(*cdesc) * desc.Constants); 879 out->inputs_param = inputs_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*inputs_param) * desc.Constants); 880 if (!cdesc || !inputs_param) 881 { 882 hr = E_OUTOFMEMORY; 883 goto cleanup; 884 } 885 886 for (i = 0; i < desc.Constants; ++i) 887 { 888 unsigned int index = out->input_count; 889 WORD constantinfo_reserved; 890 891 hc = ID3DXConstantTable_GetConstant(ctab, NULL, i); 892 if (!hc) 893 { 894 FIXME("Null constant handle.\n"); 895 goto cleanup; 896 } 897 if (FAILED(hr = get_ctab_constant_desc(ctab, hc, &cdesc[index], &constantinfo_reserved))) 898 goto cleanup; 899 inputs_param[index] = get_parameter_by_name(base, NULL, cdesc[index].Name); 900 if (!inputs_param[index]) 901 { 902 WARN("Could not find parameter %s in effect.\n", cdesc[index].Name); 903 continue; 904 } 905 if (cdesc[index].Class == D3DXPC_OBJECT) 906 { 907 TRACE("Object %s, parameter %p.\n", cdesc[index].Name, inputs_param[index]); 908 if (cdesc[index].RegisterSet != D3DXRS_SAMPLER || inputs_param[index]->class != D3DXPC_OBJECT 909 || !is_param_type_sampler(inputs_param[index]->type)) 910 { 911 WARN("Unexpected object type, constant %s.\n", debugstr_a(cdesc[index].Name)); 912 hr = D3DERR_INVALIDCALL; 913 goto cleanup; 914 } 915 if (max(inputs_param[index]->element_count, 1) < cdesc[index].RegisterCount) 916 { 917 WARN("Register count exceeds parameter size, constant %s.\n", debugstr_a(cdesc[index].Name)); 918 hr = D3DERR_INVALIDCALL; 919 goto cleanup; 920 } 921 } 922 if (!is_top_level_parameter(inputs_param[index])) 923 { 924 WARN("Expected top level parameter '%s'.\n", debugstr_a(cdesc[index].Name)); 925 hr = E_FAIL; 926 goto cleanup; 927 } 928 929 for (j = 0; j < skip_constants_count; ++j) 930 { 931 if (!strcmp(cdesc[index].Name, skip_constants[j])) 932 { 933 if (!constantinfo_reserved) 934 { 935 WARN("skip_constants parameter %s is not register bound.\n", 936 cdesc[index].Name); 937 hr = D3DERR_INVALIDCALL; 938 goto cleanup; 939 } 940 TRACE("Skipping constant %s.\n", cdesc[index].Name); 941 break; 942 } 943 } 944 if (j < skip_constants_count) 945 continue; 946 ++out->input_count; 947 if (inputs_param[index]->class == D3DXPC_OBJECT) 948 continue; 949 if (FAILED(hr = init_set_constants_param(out, ctab, hc, inputs_param[index]))) 950 goto cleanup; 951 } 952 if (pres) 953 append_pres_const_sets_for_shader_input(out, pres); 954 if (out->const_set_count) 955 { 956 struct d3dx_const_param_eval_output *new_alloc; 957 958 qsort(out->const_set, out->const_set_count, sizeof(*out->const_set), compare_const_set); 959 960 i = 0; 961 while (i < out->const_set_count - 1) 962 { 963 if (out->const_set[i].constant_class == D3DXPC_FORCE_DWORD 964 && out->const_set[i + 1].constant_class == D3DXPC_FORCE_DWORD 965 && out->const_set[i].table == out->const_set[i + 1].table 966 && out->const_set[i].register_index + out->const_set[i].register_count 967 >= out->const_set[i + 1].register_index) 968 { 969 assert(out->const_set[i].register_index + out->const_set[i].register_count 970 <= out->const_set[i + 1].register_index + 1); 971 out->const_set[i].register_count = out->const_set[i + 1].register_index + 1 972 - out->const_set[i].register_index; 973 memmove(&out->const_set[i + 1], &out->const_set[i + 2], sizeof(out->const_set[i]) 974 * (out->const_set_count - i - 2)); 975 --out->const_set_count; 976 } 977 else 978 { 979 ++i; 980 } 981 } 982 983 new_alloc = HeapReAlloc(GetProcessHeap(), 0, out->const_set, 984 sizeof(*out->const_set) * out->const_set_count); 985 if (new_alloc) 986 { 987 out->const_set = new_alloc; 988 out->const_set_size = out->const_set_count; 989 } 990 else 991 { 992 WARN("Out of memory.\n"); 993 } 994 } 995 cleanup: 996 ID3DXConstantTable_Release(ctab); 997 return hr; 998 } 999 1000 static void update_table_size(unsigned int *table_sizes, unsigned int table, unsigned int max_register) 1001 { 1002 if (table < PRES_REGTAB_COUNT) 1003 table_sizes[table] = max(table_sizes[table], max_register + 1); 1004 } 1005 1006 static void update_table_sizes_consts(unsigned int *table_sizes, struct d3dx_const_tab *ctab) 1007 { 1008 unsigned int i, table, max_register; 1009 1010 for (i = 0; i < ctab->input_count; ++i) 1011 { 1012 if (!ctab->inputs[i].RegisterCount) 1013 continue; 1014 max_register = ctab->inputs[i].RegisterIndex + ctab->inputs[i].RegisterCount - 1; 1015 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1016 update_table_size(table_sizes, table, max_register); 1017 } 1018 } 1019 1020 static void dump_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *arg, int component_count) 1021 { 1022 static const char *xyzw_str = "xyzw"; 1023 unsigned int i, table; 1024 1025 table = arg->reg.table; 1026 if (table == PRES_REGTAB_IMMED && arg->index_reg.table == PRES_REGTAB_COUNT) 1027 { 1028 TRACE("("); 1029 for (i = 0; i < component_count; ++i) 1030 TRACE(i < component_count - 1 ? "%.16e, " : "%.16e", 1031 ((double *)rs->tables[PRES_REGTAB_IMMED])[arg->reg.offset + i]); 1032 TRACE(")"); 1033 } 1034 else 1035 { 1036 if (arg->index_reg.table == PRES_REGTAB_COUNT) 1037 { 1038 TRACE("%s%u.", table_symbol[table], get_reg_offset(table, arg->reg.offset)); 1039 } 1040 else 1041 { 1042 unsigned int index_reg; 1043 1044 index_reg = get_reg_offset(arg->index_reg.table, arg->index_reg.offset); 1045 TRACE("%s[%u + %s%u.%c].", table_symbol[table], get_reg_offset(table, arg->reg.offset), 1046 table_symbol[arg->index_reg.table], index_reg, 1047 xyzw_str[arg->index_reg.offset - get_offset_reg(arg->index_reg.table, index_reg)]); 1048 } 1049 for (i = 0; i < component_count; ++i) 1050 TRACE("%c", xyzw_str[(arg->reg.offset + i) % 4]); 1051 } 1052 } 1053 1054 static void dump_registers(struct d3dx_const_tab *ctab) 1055 { 1056 unsigned int table, i; 1057 1058 for (i = 0; i < ctab->input_count; ++i) 1059 { 1060 table = ctab->regset2table[ctab->inputs[i].RegisterSet]; 1061 TRACE("// %-12s %s%-4u %u\n", ctab->inputs_param[i] ? ctab->inputs_param[i]->name : "(nil)", 1062 table_symbol[table], ctab->inputs[i].RegisterIndex, ctab->inputs[i].RegisterCount); 1063 } 1064 } 1065 1066 static void dump_ins(struct d3dx_regstore *rs, const struct d3dx_pres_ins *ins) 1067 { 1068 unsigned int i; 1069 1070 TRACE("%s ", pres_op_info[ins->op].mnem); 1071 dump_arg(rs, &ins->output, pres_op_info[ins->op].func_all_comps ? 1 : ins->component_count); 1072 for (i = 0; i < pres_op_info[ins->op].input_count; ++i) 1073 { 1074 TRACE(", "); 1075 dump_arg(rs, &ins->inputs[i], ins->scalar_op && !i ? 1 : ins->component_count); 1076 } 1077 TRACE("\n"); 1078 } 1079 1080 static void dump_preshader(struct d3dx_preshader *pres) 1081 { 1082 unsigned int i, immediate_count = pres->regs.table_sizes[PRES_REGTAB_IMMED] * 4; 1083 const double *immediates = pres->regs.tables[PRES_REGTAB_IMMED]; 1084 1085 if (immediate_count) 1086 TRACE("// Immediates:\n"); 1087 for (i = 0; i < immediate_count; ++i) 1088 { 1089 if (!(i % 4)) 1090 TRACE("// "); 1091 TRACE("%.8e", immediates[i]); 1092 if (i % 4 == 3) 1093 TRACE("\n"); 1094 else 1095 TRACE(", "); 1096 } 1097 TRACE("// Preshader registers:\n"); 1098 dump_registers(&pres->inputs); 1099 TRACE("preshader\n"); 1100 for (i = 0; i < pres->ins_count; ++i) 1101 dump_ins(&pres->regs, &pres->ins[i]); 1102 } 1103 1104 static HRESULT parse_preshader(struct d3dx_preshader *pres, unsigned int *ptr, unsigned int count, struct d3dx9_base_effect *base) 1105 { 1106 unsigned int *p; 1107 unsigned int i, j, const_count; 1108 double *dconst; 1109 HRESULT hr; 1110 unsigned int saved_word; 1111 unsigned int section_size; 1112 1113 TRACE("Preshader version %#x.\n", *ptr & 0xffff); 1114 1115 if (!count) 1116 { 1117 WARN("Unexpected end of byte code buffer.\n"); 1118 return D3DXERR_INVALIDDATA; 1119 } 1120 1121 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_CLIT, §ion_size); 1122 if (p) 1123 { 1124 const_count = *p++; 1125 if (const_count > (section_size - 1) / (sizeof(double) / sizeof(unsigned int))) 1126 { 1127 WARN("Byte code buffer ends unexpectedly.\n"); 1128 return D3DXERR_INVALIDDATA; 1129 } 1130 dconst = (double *)p; 1131 } 1132 else 1133 { 1134 const_count = 0; 1135 dconst = NULL; 1136 } 1137 TRACE("%u double constants.\n", const_count); 1138 1139 p = find_bytecode_comment(ptr + 1, count - 1, FOURCC_FXLC, §ion_size); 1140 if (!p) 1141 { 1142 WARN("Could not find preshader code.\n"); 1143 return D3D_OK; 1144 } 1145 pres->ins_count = *p++; 1146 --section_size; 1147 if (pres->ins_count > UINT_MAX / sizeof(*pres->ins)) 1148 { 1149 WARN("Invalid instruction count %u.\n", pres->ins_count); 1150 return D3DXERR_INVALIDDATA; 1151 } 1152 TRACE("%u instructions.\n", pres->ins_count); 1153 pres->ins = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*pres->ins) * pres->ins_count); 1154 if (!pres->ins) 1155 return E_OUTOFMEMORY; 1156 for (i = 0; i < pres->ins_count; ++i) 1157 { 1158 unsigned int *ptr_next; 1159 1160 ptr_next = parse_pres_ins(p, section_size, &pres->ins[i]); 1161 if (!ptr_next) 1162 return D3DXERR_INVALIDDATA; 1163 section_size -= ptr_next - p; 1164 p = ptr_next; 1165 } 1166 1167 pres->inputs.regset2table = pres_regset2table; 1168 1169 saved_word = *ptr; 1170 *ptr = 0xfffe0000; 1171 hr = get_constants_desc(ptr, &pres->inputs, base, NULL, 0, NULL); 1172 *ptr = saved_word; 1173 if (FAILED(hr)) 1174 return hr; 1175 1176 if (const_count % get_reg_components(PRES_REGTAB_IMMED)) 1177 { 1178 FIXME("const_count %u is not a multiple of %u.\n", const_count, 1179 get_reg_components(PRES_REGTAB_IMMED)); 1180 return D3DXERR_INVALIDDATA; 1181 } 1182 pres->regs.table_sizes[PRES_REGTAB_IMMED] = get_reg_offset(PRES_REGTAB_IMMED, const_count); 1183 1184 update_table_sizes_consts(pres->regs.table_sizes, &pres->inputs); 1185 for (i = 0; i < pres->ins_count; ++i) 1186 { 1187 for (j = 0; j < pres_op_info[pres->ins[i].op].input_count; ++j) 1188 { 1189 enum pres_reg_tables table; 1190 unsigned int reg_idx; 1191 1192 if (pres->ins[i].inputs[j].index_reg.table == PRES_REGTAB_COUNT) 1193 { 1194 unsigned int last_component_index = pres->ins[i].scalar_op && !j ? 0 1195 : pres->ins[i].component_count - 1; 1196 1197 table = pres->ins[i].inputs[j].reg.table; 1198 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].reg.offset 1199 + last_component_index); 1200 } 1201 else 1202 { 1203 table = pres->ins[i].inputs[j].index_reg.table; 1204 reg_idx = get_reg_offset(table, pres->ins[i].inputs[j].index_reg.offset); 1205 } 1206 if (reg_idx >= pres->regs.table_sizes[table]) 1207 { 1208 FIXME("Out of bounds register index, i %u, j %u, table %u, reg_idx %u.\n", 1209 i, j, table, reg_idx); 1210 return D3DXERR_INVALIDDATA; 1211 } 1212 } 1213 update_table_size(pres->regs.table_sizes, pres->ins[i].output.reg.table, 1214 get_reg_offset(pres->ins[i].output.reg.table, pres->ins[i].output.reg.offset)); 1215 } 1216 if (FAILED(regstore_alloc_table(&pres->regs, PRES_REGTAB_IMMED))) 1217 return E_OUTOFMEMORY; 1218 regstore_set_values(&pres->regs, PRES_REGTAB_IMMED, dconst, 0, const_count); 1219 1220 return D3D_OK; 1221 } 1222 1223 HRESULT d3dx_create_param_eval(struct d3dx9_base_effect *base_effect, void *byte_code, unsigned int byte_code_size, 1224 D3DXPARAMETER_TYPE type, struct d3dx_param_eval **peval_out, ULONG64 *version_counter, 1225 const char **skip_constants, unsigned int skip_constants_count) 1226 { 1227 struct d3dx_param_eval *peval; 1228 unsigned int *ptr, *shader_ptr = NULL; 1229 unsigned int i; 1230 BOOL shader; 1231 unsigned int count, pres_size; 1232 HRESULT ret; 1233 1234 TRACE("base_effect %p, byte_code %p, byte_code_size %u, type %u, peval_out %p.\n", 1235 base_effect, byte_code, byte_code_size, type, peval_out); 1236 1237 count = byte_code_size / sizeof(unsigned int); 1238 if (!byte_code || !count) 1239 { 1240 *peval_out = NULL; 1241 return D3D_OK; 1242 } 1243 1244 peval = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*peval)); 1245 if (!peval) 1246 { 1247 ret = E_OUTOFMEMORY; 1248 goto err_out; 1249 } 1250 peval->version_counter = version_counter; 1251 1252 peval->param_type = type; 1253 switch (type) 1254 { 1255 case D3DXPT_VERTEXSHADER: 1256 case D3DXPT_PIXELSHADER: 1257 shader = TRUE; 1258 break; 1259 default: 1260 shader = FALSE; 1261 break; 1262 } 1263 peval->shader_inputs.regset2table = shad_regset2table; 1264 1265 ptr = (unsigned int *)byte_code; 1266 if (shader) 1267 { 1268 if ((*ptr & 0xfffe0000) != 0xfffe0000) 1269 { 1270 FIXME("Invalid shader signature %#x.\n", *ptr); 1271 ret = D3DXERR_INVALIDDATA; 1272 goto err_out; 1273 } 1274 TRACE("Shader version %#x.\n", *ptr & 0xffff); 1275 shader_ptr = ptr; 1276 ptr = find_bytecode_comment(ptr + 1, count - 1, FOURCC_PRES, &pres_size); 1277 if (!ptr) 1278 TRACE("No preshader found.\n"); 1279 } 1280 else 1281 { 1282 pres_size = count; 1283 } 1284 1285 if (ptr && FAILED(ret = parse_preshader(&peval->pres, ptr, pres_size, base_effect))) 1286 { 1287 FIXME("Failed parsing preshader, byte code for analysis follows.\n"); 1288 dump_bytecode(byte_code, byte_code_size); 1289 goto err_out; 1290 } 1291 1292 if (shader) 1293 { 1294 if (FAILED(ret = get_constants_desc(shader_ptr, &peval->shader_inputs, base_effect, 1295 skip_constants, skip_constants_count, &peval->pres))) 1296 { 1297 TRACE("Could not get shader constant table, hr %#x.\n", ret); 1298 goto err_out; 1299 } 1300 update_table_sizes_consts(peval->pres.regs.table_sizes, &peval->shader_inputs); 1301 } 1302 1303 for (i = PRES_REGTAB_FIRST_SHADER; i < PRES_REGTAB_COUNT; ++i) 1304 { 1305 if (FAILED(ret = regstore_alloc_table(&peval->pres.regs, i))) 1306 goto err_out; 1307 } 1308 1309 if (TRACE_ON(d3dx)) 1310 { 1311 dump_bytecode(byte_code, byte_code_size); 1312 dump_preshader(&peval->pres); 1313 if (shader) 1314 { 1315 TRACE("// Shader registers:\n"); 1316 dump_registers(&peval->shader_inputs); 1317 } 1318 } 1319 *peval_out = peval; 1320 TRACE("Created parameter evaluator %p.\n", *peval_out); 1321 return D3D_OK; 1322 1323 err_out: 1324 WARN("Error creating parameter evaluator.\n"); 1325 if (TRACE_ON(d3dx)) 1326 dump_bytecode(byte_code, byte_code_size); 1327 1328 d3dx_free_param_eval(peval); 1329 *peval_out = NULL; 1330 return ret; 1331 } 1332 1333 static void d3dx_free_const_tab(struct d3dx_const_tab *ctab) 1334 { 1335 HeapFree(GetProcessHeap(), 0, ctab->inputs); 1336 HeapFree(GetProcessHeap(), 0, ctab->inputs_param); 1337 HeapFree(GetProcessHeap(), 0, ctab->const_set); 1338 } 1339 1340 static void d3dx_free_preshader(struct d3dx_preshader *pres) 1341 { 1342 HeapFree(GetProcessHeap(), 0, pres->ins); 1343 1344 regstore_free_tables(&pres->regs); 1345 d3dx_free_const_tab(&pres->inputs); 1346 } 1347 1348 void d3dx_free_param_eval(struct d3dx_param_eval *peval) 1349 { 1350 TRACE("peval %p.\n", peval); 1351 1352 if (!peval) 1353 return; 1354 1355 d3dx_free_preshader(&peval->pres); 1356 d3dx_free_const_tab(&peval->shader_inputs); 1357 HeapFree(GetProcessHeap(), 0, peval); 1358 } 1359 1360 static void pres_int_from_float(void *out, const void *in, unsigned int count) 1361 { 1362 unsigned int i; 1363 const float *in_float = in; 1364 int *out_int = out; 1365 1366 for (i = 0; i < count; ++i) 1367 out_int[i] = in_float[i]; 1368 } 1369 1370 static void pres_bool_from_value(void *out, const void *in, unsigned int count) 1371 { 1372 unsigned int i; 1373 const DWORD *in_dword = in; 1374 BOOL *out_bool = out; 1375 1376 for (i = 0; i < count; ++i) 1377 out_bool[i] = !!in_dword[i]; 1378 } 1379 1380 static void pres_float_from_int(void *out, const void *in, unsigned int count) 1381 { 1382 unsigned int i; 1383 const int *in_int = in; 1384 float *out_float = out; 1385 1386 for (i = 0; i < count; ++i) 1387 out_float[i] = in_int[i]; 1388 } 1389 1390 static void pres_float_from_bool(void *out, const void *in, unsigned int count) 1391 { 1392 unsigned int i; 1393 const BOOL *in_bool = in; 1394 float *out_float = out; 1395 1396 for (i = 0; i < count; ++i) 1397 out_float[i] = !!in_bool[i]; 1398 } 1399 1400 static void pres_int_from_bool(void *out, const void *in, unsigned int count) 1401 { 1402 unsigned int i; 1403 const float *in_bool = in; 1404 int *out_int = out; 1405 1406 for (i = 0; i < count; ++i) 1407 out_int[i] = !!in_bool[i]; 1408 } 1409 1410 static void regstore_set_data(struct d3dx_regstore *rs, unsigned int table, 1411 unsigned int offset, const unsigned int *in, unsigned int count, enum pres_value_type param_type) 1412 { 1413 typedef void (*conv_func)(void *out, const void *in, unsigned int count); 1414 static const conv_func set_const_funcs[PRES_VT_COUNT][PRES_VT_COUNT] = 1415 { 1416 {NULL, NULL, pres_int_from_float, pres_bool_from_value}, 1417 {NULL, NULL, NULL, NULL}, 1418 {pres_float_from_int, NULL, NULL, pres_bool_from_value}, 1419 {pres_float_from_bool, NULL, pres_int_from_bool, NULL} 1420 }; 1421 enum pres_value_type table_type = table_info[table].type; 1422 1423 if (param_type == table_type) 1424 { 1425 regstore_set_values(rs, table, in, offset, count); 1426 return; 1427 } 1428 1429 set_const_funcs[param_type][table_type]((unsigned int *)rs->tables[table] + offset, in, count); 1430 } 1431 1432 static HRESULT set_constants_device(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1433 D3DXPARAMETER_TYPE type, enum pres_reg_tables table, void *ptr, 1434 unsigned int start, unsigned int count) 1435 { 1436 if (type == D3DXPT_VERTEXSHADER) 1437 { 1438 switch(table) 1439 { 1440 case PRES_REGTAB_OCONST: 1441 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantF, start, ptr, count); 1442 case PRES_REGTAB_OICONST: 1443 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantI, start, ptr, count); 1444 case PRES_REGTAB_OBCONST: 1445 return SET_D3D_STATE_(manager, device, SetVertexShaderConstantB, start, ptr, count); 1446 default: 1447 FIXME("Unexpected register table %u.\n", table); 1448 return D3DERR_INVALIDCALL; 1449 } 1450 } 1451 else if (type == D3DXPT_PIXELSHADER) 1452 { 1453 switch(table) 1454 { 1455 case PRES_REGTAB_OCONST: 1456 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantF, start, ptr, count); 1457 case PRES_REGTAB_OICONST: 1458 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantI, start, ptr, count); 1459 case PRES_REGTAB_OBCONST: 1460 return SET_D3D_STATE_(manager, device, SetPixelShaderConstantB, start, ptr, count); 1461 default: 1462 FIXME("Unexpected register table %u.\n", table); 1463 return D3DERR_INVALIDCALL; 1464 } 1465 } 1466 else 1467 { 1468 FIXME("Unexpected parameter type %u.\n", type); 1469 return D3DERR_INVALIDCALL; 1470 } 1471 } 1472 1473 static HRESULT set_constants(struct d3dx_regstore *rs, struct d3dx_const_tab *const_tab, 1474 ULONG64 new_update_version, ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1475 D3DXPARAMETER_TYPE type, BOOL device_update_all, BOOL pres_dirty) 1476 { 1477 unsigned int const_idx; 1478 unsigned int current_start = 0, current_count = 0; 1479 enum pres_reg_tables current_table = PRES_REGTAB_COUNT; 1480 BOOL update_device = manager || device; 1481 HRESULT hr, result = D3D_OK; 1482 ULONG64 update_version = const_tab->update_version; 1483 1484 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1485 { 1486 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1487 enum pres_reg_tables table = const_set->table; 1488 struct d3dx_parameter *param = const_set->param; 1489 unsigned int element, i, j, start_offset; 1490 struct const_upload_info info; 1491 unsigned int *data; 1492 enum pres_value_type param_type; 1493 1494 if (!(param && is_param_dirty(param, update_version))) 1495 continue; 1496 1497 data = param->data; 1498 start_offset = get_offset_reg(table, const_set->register_index); 1499 if (const_set->direct_copy) 1500 { 1501 regstore_set_values(rs, table, data, start_offset, 1502 get_offset_reg(table, const_set->register_count)); 1503 continue; 1504 } 1505 param_type = table_type_from_param_type(param->type); 1506 if (const_set->constant_class == D3DXPC_SCALAR || const_set->constant_class == D3DXPC_VECTOR) 1507 { 1508 unsigned int count = max(param->rows, param->columns); 1509 1510 if (count >= get_reg_components(table)) 1511 { 1512 regstore_set_data(rs, table, start_offset, data, 1513 count * const_set->element_count, param_type); 1514 } 1515 else 1516 { 1517 for (element = 0; element < const_set->element_count; ++element) 1518 regstore_set_data(rs, table, start_offset + get_offset_reg(table, element), 1519 &data[element * count], count, param_type); 1520 } 1521 continue; 1522 } 1523 get_const_upload_info(const_set, &info); 1524 for (element = 0; element < const_set->element_count; ++element) 1525 { 1526 unsigned int *out = (unsigned int *)rs->tables[table] + start_offset; 1527 1528 /* Store reshaped but (possibly) not converted yet data temporarily in the same constants buffer. 1529 * All the supported types of parameters and table values have the same size. */ 1530 if (info.transpose) 1531 { 1532 for (i = 0; i < info.major_count; ++i) 1533 for (j = 0; j < info.minor; ++j) 1534 out[i * info.major_stride + j] = data[i + j * info.major]; 1535 1536 for (j = 0; j < info.minor_remainder; ++j) 1537 out[i * info.major_stride + j] = data[i + j * info.major]; 1538 } 1539 else 1540 { 1541 for (i = 0; i < info.major_count; ++i) 1542 for (j = 0; j < info.minor; ++j) 1543 out[i * info.major_stride + j] = data[i * info.minor + j]; 1544 } 1545 start_offset += get_offset_reg(table, const_set->register_count); 1546 data += param->rows * param->columns; 1547 } 1548 start_offset = get_offset_reg(table, const_set->register_index); 1549 if (table_info[table].type != param_type) 1550 regstore_set_data(rs, table, start_offset, (unsigned int *)rs->tables[table] + start_offset, 1551 get_offset_reg(table, const_set->register_count) * const_set->element_count, param_type); 1552 } 1553 const_tab->update_version = new_update_version; 1554 if (!update_device) 1555 return D3D_OK; 1556 1557 for (const_idx = 0; const_idx < const_tab->const_set_count; ++const_idx) 1558 { 1559 struct d3dx_const_param_eval_output *const_set = &const_tab->const_set[const_idx]; 1560 1561 if (device_update_all || (const_set->param 1562 ? is_param_dirty(const_set->param, update_version) : pres_dirty)) 1563 { 1564 enum pres_reg_tables table = const_set->table; 1565 1566 if (table == current_table && current_start + current_count == const_set->register_index) 1567 { 1568 current_count += const_set->register_count * const_set->element_count; 1569 } 1570 else 1571 { 1572 if (current_count) 1573 { 1574 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1575 (DWORD *)rs->tables[current_table] 1576 + get_offset_reg(current_table, current_start), current_start, current_count))) 1577 result = hr; 1578 } 1579 current_table = table; 1580 current_start = const_set->register_index; 1581 current_count = const_set->register_count * const_set->element_count; 1582 } 1583 } 1584 } 1585 if (current_count) 1586 { 1587 if (FAILED(hr = set_constants_device(manager, device, type, current_table, 1588 (DWORD *)rs->tables[current_table] 1589 + get_offset_reg(current_table, current_start), current_start, current_count))) 1590 result = hr; 1591 } 1592 return result; 1593 } 1594 1595 static double exec_get_reg_value(struct d3dx_regstore *rs, enum pres_reg_tables table, unsigned int offset) 1596 { 1597 return regstore_get_double(rs, table, offset); 1598 } 1599 1600 static double exec_get_arg(struct d3dx_regstore *rs, const struct d3dx_pres_operand *opr, unsigned int comp) 1601 { 1602 unsigned int offset, base_index, reg_index, table; 1603 1604 table = opr->reg.table; 1605 1606 if (opr->index_reg.table == PRES_REGTAB_COUNT) 1607 base_index = 0; 1608 else 1609 base_index = lrint(exec_get_reg_value(rs, opr->index_reg.table, opr->index_reg.offset)); 1610 1611 offset = get_offset_reg(table, base_index) + opr->reg.offset + comp; 1612 reg_index = get_reg_offset(table, offset); 1613 1614 if (reg_index >= rs->table_sizes[table]) 1615 { 1616 unsigned int wrap_size; 1617 1618 if (table == PRES_REGTAB_CONST) 1619 { 1620 /* As it can be guessed from tests, offset into floating constant table is wrapped 1621 * to the nearest power of 2 and not to the actual table size. */ 1622 for (wrap_size = 1; wrap_size < rs->table_sizes[table]; wrap_size <<= 1) 1623 ; 1624 } 1625 else 1626 { 1627 wrap_size = rs->table_sizes[table]; 1628 } 1629 WARN("Wrapping register index %u, table %u, wrap_size %u, table size %u.\n", 1630 reg_index, table, wrap_size, rs->table_sizes[table]); 1631 reg_index %= wrap_size; 1632 1633 if (reg_index >= rs->table_sizes[table]) 1634 return 0.0; 1635 1636 offset = get_offset_reg(table, reg_index) + offset % get_reg_components(table); 1637 } 1638 1639 return exec_get_reg_value(rs, table, offset); 1640 } 1641 1642 static void exec_set_arg(struct d3dx_regstore *rs, const struct d3dx_pres_reg *reg, 1643 unsigned int comp, double res) 1644 { 1645 regstore_set_double(rs, reg->table, reg->offset + comp, res); 1646 } 1647 1648 #define ARGS_ARRAY_SIZE 8 1649 static HRESULT execute_preshader(struct d3dx_preshader *pres) 1650 { 1651 unsigned int i, j, k; 1652 double args[ARGS_ARRAY_SIZE]; 1653 double res; 1654 1655 for (i = 0; i < pres->ins_count; ++i) 1656 { 1657 const struct d3dx_pres_ins *ins; 1658 const struct op_info *oi; 1659 1660 ins = &pres->ins[i]; 1661 oi = &pres_op_info[ins->op]; 1662 if (oi->func_all_comps) 1663 { 1664 if (oi->input_count * ins->component_count > ARGS_ARRAY_SIZE) 1665 { 1666 FIXME("Too many arguments (%u) for one instruction.\n", oi->input_count * ins->component_count); 1667 return E_FAIL; 1668 } 1669 for (k = 0; k < oi->input_count; ++k) 1670 for (j = 0; j < ins->component_count; ++j) 1671 args[k * ins->component_count + j] = exec_get_arg(&pres->regs, &ins->inputs[k], 1672 ins->scalar_op && !k ? 0 : j); 1673 res = oi->func(args, ins->component_count); 1674 1675 /* only 'dot' instruction currently falls here */ 1676 exec_set_arg(&pres->regs, &ins->output.reg, 0, res); 1677 } 1678 else 1679 { 1680 for (j = 0; j < ins->component_count; ++j) 1681 { 1682 for (k = 0; k < oi->input_count; ++k) 1683 args[k] = exec_get_arg(&pres->regs, &ins->inputs[k], ins->scalar_op && !k ? 0 : j); 1684 res = oi->func(args, ins->component_count); 1685 exec_set_arg(&pres->regs, &ins->output.reg, j, res); 1686 } 1687 } 1688 } 1689 return D3D_OK; 1690 } 1691 1692 static BOOL is_const_tab_input_dirty(struct d3dx_const_tab *ctab, ULONG64 update_version) 1693 { 1694 unsigned int i; 1695 1696 if (update_version == ULONG64_MAX) 1697 update_version = ctab->update_version; 1698 for (i = 0; i < ctab->input_count; ++i) 1699 { 1700 if (is_top_level_param_dirty(top_level_parameter_from_parameter(ctab->inputs_param[i]), 1701 update_version)) 1702 return TRUE; 1703 } 1704 return FALSE; 1705 } 1706 1707 BOOL is_param_eval_input_dirty(struct d3dx_param_eval *peval, ULONG64 update_version) 1708 { 1709 return is_const_tab_input_dirty(&peval->pres.inputs, update_version) 1710 || is_const_tab_input_dirty(&peval->shader_inputs, update_version); 1711 } 1712 1713 HRESULT d3dx_evaluate_parameter(struct d3dx_param_eval *peval, const struct d3dx_parameter *param, 1714 void *param_value) 1715 { 1716 HRESULT hr; 1717 unsigned int i; 1718 unsigned int elements, elements_param, elements_table; 1719 float *oc; 1720 1721 TRACE("peval %p, param %p, param_value %p.\n", peval, param, param_value); 1722 1723 if (is_const_tab_input_dirty(&peval->pres.inputs, ULONG64_MAX)) 1724 { 1725 set_constants(&peval->pres.regs, &peval->pres.inputs, 1726 next_update_version(peval->version_counter), 1727 NULL, NULL, peval->param_type, FALSE, FALSE); 1728 1729 if (FAILED(hr = execute_preshader(&peval->pres))) 1730 return hr; 1731 } 1732 1733 elements_table = get_offset_reg(PRES_REGTAB_OCONST, peval->pres.regs.table_sizes[PRES_REGTAB_OCONST]); 1734 elements_param = param->bytes / sizeof(unsigned int); 1735 elements = min(elements_table, elements_param); 1736 oc = (float *)peval->pres.regs.tables[PRES_REGTAB_OCONST]; 1737 for (i = 0; i < elements; ++i) 1738 set_number((unsigned int *)param_value + i, param->type, oc + i, D3DXPT_FLOAT); 1739 return D3D_OK; 1740 } 1741 1742 HRESULT d3dx_param_eval_set_shader_constants(ID3DXEffectStateManager *manager, struct IDirect3DDevice9 *device, 1743 struct d3dx_param_eval *peval, BOOL update_all) 1744 { 1745 HRESULT hr; 1746 struct d3dx_preshader *pres = &peval->pres; 1747 struct d3dx_regstore *rs = &pres->regs; 1748 ULONG64 new_update_version = next_update_version(peval->version_counter); 1749 BOOL pres_dirty = FALSE; 1750 1751 TRACE("device %p, peval %p, param_type %u.\n", device, peval, peval->param_type); 1752 1753 if (is_const_tab_input_dirty(&pres->inputs, ULONG64_MAX)) 1754 { 1755 set_constants(rs, &pres->inputs, new_update_version, 1756 NULL, NULL, peval->param_type, FALSE, FALSE); 1757 if (FAILED(hr = execute_preshader(pres))) 1758 return hr; 1759 pres_dirty = TRUE; 1760 } 1761 1762 return set_constants(rs, &peval->shader_inputs, new_update_version, 1763 manager, device, peval->param_type, update_all, pres_dirty); 1764 } 1765