1 /* 2 * Copyright 2011-2019 Branimir Karadzic. All rights reserved. 3 * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause 4 */ 5 6 #include "bgfx_p.h" 7 #include "shader_dxbc.h" 8 9 namespace bgfx 10 { 11 struct DxbcOpcodeInfo 12 { 13 uint8_t numOperands; 14 uint8_t numValues; 15 }; 16 17 static const DxbcOpcodeInfo s_dxbcOpcodeInfo[] = 18 { 19 { 3, 0 }, // ADD 20 { 3, 0 }, // AND 21 { 0, 0 }, // BREAK 22 { 1, 0 }, // BREAKC 23 { 0, 0 }, // CALL 24 { 0, 0 }, // CALLC 25 { 1, 0 }, // CASE 26 { 0, 0 }, // CONTINUE 27 { 1, 0 }, // CONTINUEC 28 { 0, 0 }, // CUT 29 { 0, 0 }, // DEFAULT 30 { 2, 0 }, // DERIV_RTX 31 { 2, 0 }, // DERIV_RTY 32 { 1, 0 }, // DISCARD 33 { 3, 0 }, // DIV 34 { 3, 0 }, // DP2 35 { 3, 0 }, // DP3 36 { 3, 0 }, // DP4 37 { 0, 0 }, // ELSE 38 { 0, 0 }, // EMIT 39 { 0, 0 }, // EMITTHENCUT 40 { 0, 0 }, // ENDIF 41 { 0, 0 }, // ENDLOOP 42 { 0, 0 }, // ENDSWITCH 43 { 3, 0 }, // EQ 44 { 2, 0 }, // EXP 45 { 2, 0 }, // FRC 46 { 2, 0 }, // FTOI 47 { 2, 0 }, // FTOU 48 { 3, 0 }, // GE 49 { 3, 0 }, // IADD 50 { 1, 0 }, // IF 51 { 3, 0 }, // IEQ 52 { 3, 0 }, // IGE 53 { 3, 0 }, // ILT 54 { 4, 0 }, // IMAD 55 { 3, 0 }, // IMAX 56 { 3, 0 }, // IMIN 57 { 4, 0 }, // IMUL 58 { 3, 0 }, // INE 59 { 2, 0 }, // INEG 60 { 3, 0 }, // ISHL 61 { 3, 0 }, // ISHR 62 { 2, 0 }, // ITOF 63 { 0, 0 }, // LABEL 64 { 3, 0 }, // LD 65 { 4, 0 }, // LD_MS 66 { 2, 0 }, // LOG 67 { 0, 0 }, // LOOP 68 { 3, 0 }, // LT 69 { 4, 0 }, // MAD 70 { 3, 0 }, // MIN 71 { 3, 0 }, // MAX 72 { 0, 1 }, // CUSTOMDATA 73 { 2, 0 }, // MOV 74 { 4, 0 }, // MOVC 75 { 3, 0 }, // MUL 76 { 3, 0 }, // NE 77 { 0, 0 }, // NOP 78 { 2, 0 }, // NOT 79 { 3, 0 }, // OR 80 { 3, 0 }, // RESINFO 81 { 0, 0 }, // RET 82 { 1, 0 }, // RETC 83 { 2, 0 }, // ROUND_NE 84 { 2, 0 }, // ROUND_NI 85 { 2, 0 }, // ROUND_PI 86 { 2, 0 }, // ROUND_Z 87 { 2, 0 }, // RSQ 88 { 4, 0 }, // SAMPLE 89 { 5, 0 }, // SAMPLE_C 90 { 5, 0 }, // SAMPLE_C_LZ 91 { 5, 0 }, // SAMPLE_L 92 { 6, 0 }, // SAMPLE_D 93 { 5, 0 }, // SAMPLE_B 94 { 2, 0 }, // SQRT 95 { 1, 0 }, // SWITCH 96 { 3, 0 }, // SINCOS 97 { 4, 0 }, // UDIV 98 { 3, 0 }, // ULT 99 { 3, 0 }, // UGE 100 { 4, 0 }, // UMUL 101 { 4, 0 }, // UMAD 102 { 3, 0 }, // UMAX 103 { 3, 0 }, // UMIN 104 { 3, 0 }, // USHR 105 { 2, 0 }, // UTOF 106 { 3, 0 }, // XOR 107 { 1, 1 }, // DCL_RESOURCE 108 { 1, 0 }, // DCL_CONSTANT_BUFFER 109 { 1, 0 }, // DCL_SAMPLER 110 { 1, 1 }, // DCL_INDEX_RANGE 111 { 1, 0 }, // DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY 112 { 1, 0 }, // DCL_GS_INPUT_PRIMITIVE 113 { 0, 1 }, // DCL_MAX_OUTPUT_VERTEX_COUNT 114 { 1, 0 }, // DCL_INPUT 115 { 1, 1 }, // DCL_INPUT_SGV 116 { 1, 0 }, // DCL_INPUT_SIV 117 { 1, 0 }, // DCL_INPUT_PS 118 { 1, 1 }, // DCL_INPUT_PS_SGV 119 { 1, 1 }, // DCL_INPUT_PS_SIV 120 { 1, 0 }, // DCL_OUTPUT 121 { 1, 0 }, // DCL_OUTPUT_SGV 122 { 1, 1 }, // DCL_OUTPUT_SIV 123 { 0, 1 }, // DCL_TEMPS 124 { 0, 3 }, // DCL_INDEXABLE_TEMP 125 { 0, 0 }, // DCL_GLOBAL_FLAGS 126 127 { 0, 0 }, // InstrD3D10 128 { 4, 0 }, // LOD 129 { 4, 0 }, // GATHER4 130 { 0, 0 }, // SAMPLE_POS 131 { 0, 0 }, // SAMPLE_INFO 132 133 { 0, 0 }, // InstrD3D10_1 134 { 0, 0 }, // HS_DECLS 135 { 0, 0 }, // HS_CONTROL_POINT_PHASE 136 { 0, 0 }, // HS_FORK_PHASE 137 { 0, 0 }, // HS_JOIN_PHASE 138 { 0, 0 }, // EMIT_STREAM 139 { 0, 0 }, // CUT_STREAM 140 { 1, 0 }, // EMITTHENCUT_STREAM 141 { 1, 0 }, // INTERFACE_CALL 142 { 0, 0 }, // BUFINFO 143 { 2, 0 }, // DERIV_RTX_COARSE 144 { 2, 0 }, // DERIV_RTX_FINE 145 { 2, 0 }, // DERIV_RTY_COARSE 146 { 2, 0 }, // DERIV_RTY_FINE 147 { 5, 0 }, // GATHER4_C 148 { 5, 0 }, // GATHER4_PO 149 { 0, 0 }, // GATHER4_PO_C 150 { 2, 0 }, // RCP 151 { 0, 0 }, // F32TOF16 152 { 0, 0 }, // F16TOF32 153 { 0, 0 }, // UADDC 154 { 0, 0 }, // USUBB 155 { 0, 0 }, // COUNTBITS 156 { 0, 0 }, // FIRSTBIT_HI 157 { 0, 0 }, // FIRSTBIT_LO 158 { 0, 0 }, // FIRSTBIT_SHI 159 { 4, 0 }, // UBFE 160 { 4, 0 }, // IBFE 161 { 5, 0 }, // BFI 162 { 0, 0 }, // BFREV 163 { 5, 0 }, // SWAPC 164 { 0, 0 }, // DCL_STREAM 165 { 1, 0 }, // DCL_FUNCTION_BODY 166 { 0, 0 }, // DCL_FUNCTION_TABLE 167 { 0, 0 }, // DCL_INTERFACE 168 { 0, 0 }, // DCL_INPUT_CONTROL_POINT_COUNT 169 { 0, 0 }, // DCL_OUTPUT_CONTROL_POINT_COUNT 170 { 0, 0 }, // DCL_TESS_DOMAIN 171 { 0, 0 }, // DCL_TESS_PARTITIONING 172 { 0, 0 }, // DCL_TESS_OUTPUT_PRIMITIVE 173 { 0, 0 }, // DCL_HS_MAX_TESSFACTOR 174 { 0, 0 }, // DCL_HS_FORK_PHASE_INSTANCE_COUNT 175 { 0, 0 }, // DCL_HS_JOIN_PHASE_INSTANCE_COUNT 176 { 0, 3 }, // DCL_THREAD_GROUP 177 { 1, 1 }, // DCL_UNORDERED_ACCESS_VIEW_TYPED 178 { 1, 0 }, // DCL_UNORDERED_ACCESS_VIEW_RAW 179 { 1, 1 }, // DCL_UNORDERED_ACCESS_VIEW_STRUCTURED 180 { 1, 1 }, // DCL_THREAD_GROUP_SHARED_MEMORY_RAW 181 { 1, 2 }, // DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED 182 { 1, 0 }, // DCL_RESOURCE_RAW 183 { 1, 1 }, // DCL_RESOURCE_STRUCTURED 184 { 3, 0 }, // LD_UAV_TYPED 185 { 3, 0 }, // STORE_UAV_TYPED 186 { 3, 0 }, // LD_RAW 187 { 3, 0 }, // STORE_RAW 188 { 4, 0 }, // LD_STRUCTURED 189 { 4, 0 }, // STORE_STRUCTURED 190 { 3, 0 }, // ATOMIC_AND 191 { 3, 0 }, // ATOMIC_OR 192 { 3, 0 }, // ATOMIC_XOR 193 { 3, 0 }, // ATOMIC_CMP_STORE 194 { 3, 0 }, // ATOMIC_IADD 195 { 3, 0 }, // ATOMIC_IMAX 196 { 3, 0 }, // ATOMIC_IMIN 197 { 3, 0 }, // ATOMIC_UMAX 198 { 3, 0 }, // ATOMIC_UMIN 199 { 2, 0 }, // IMM_ATOMIC_ALLOC 200 { 2, 0 }, // IMM_ATOMIC_CONSUME 201 { 0, 0 }, // IMM_ATOMIC_IADD 202 { 0, 0 }, // IMM_ATOMIC_AND 203 { 0, 0 }, // IMM_ATOMIC_OR 204 { 0, 0 }, // IMM_ATOMIC_XOR 205 { 0, 0 }, // IMM_ATOMIC_EXCH 206 { 0, 0 }, // IMM_ATOMIC_CMP_EXCH 207 { 0, 0 }, // IMM_ATOMIC_IMAX 208 { 0, 0 }, // IMM_ATOMIC_IMIN 209 { 0, 0 }, // IMM_ATOMIC_UMAX 210 { 0, 0 }, // IMM_ATOMIC_UMIN 211 { 0, 0 }, // SYNC 212 { 3, 0 }, // DADD 213 { 3, 0 }, // DMAX 214 { 3, 0 }, // DMIN 215 { 3, 0 }, // DMUL 216 { 3, 0 }, // DEQ 217 { 3, 0 }, // DGE 218 { 3, 0 }, // DLT 219 { 3, 0 }, // DNE 220 { 2, 0 }, // DMOV 221 { 4, 0 }, // DMOVC 222 { 0, 0 }, // DTOF 223 { 0, 0 }, // FTOD 224 { 3, 0 }, // EVAL_SNAPPED 225 { 3, 0 }, // EVAL_SAMPLE_INDEX 226 { 2, 0 }, // EVAL_CENTROID 227 { 0, 1 }, // DCL_GS_INSTANCE_COUNT 228 { 0, 0 }, // ABORT 229 { 0, 0 }, // DEBUG_BREAK 230 231 { 0, 0 }, // InstrD3D11 232 { 0, 0 }, // DDIV 233 { 0, 0 }, // DFMA 234 { 0, 0 }, // DRCP 235 { 0, 0 }, // MSAD 236 { 0, 0 }, // DTOI 237 { 0, 0 }, // DTOU 238 { 0, 0 }, // ITOD 239 { 0, 0 }, // UTOD 240 }; 241 BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOpcodeInfo) == DxbcOpcode::Count); 242 243 static const char* s_dxbcOpcode[] = 244 { 245 "add", 246 "and", 247 "break", 248 "breakc", 249 "call", 250 "callc", 251 "case", 252 "continue", 253 "continuec", 254 "cut", 255 "default", 256 "deriv_rtx", 257 "deriv_rty", 258 "discard", 259 "div", 260 "dp2", 261 "dp3", 262 "dp4", 263 "else", 264 "emit", 265 "emitthencut", 266 "endif", 267 "endloop", 268 "endswitch", 269 "eq", 270 "exp", 271 "frc", 272 "ftoi", 273 "ftou", 274 "ge", 275 "iadd", 276 "if", 277 "ieq", 278 "ige", 279 "ilt", 280 "imad", 281 "imax", 282 "imin", 283 "imul", 284 "ine", 285 "ineg", 286 "ishl", 287 "ishr", 288 "itof", 289 "label", 290 "ld", 291 "ld_ms", 292 "log", 293 "loop", 294 "lt", 295 "mad", 296 "min", 297 "max", 298 "customdata", 299 "mov", 300 "movc", 301 "mul", 302 "ne", 303 "nop", 304 "not", 305 "or", 306 "resinfo", 307 "ret", 308 "retc", 309 "round_ne", 310 "round_ni", 311 "round_pi", 312 "round_z", 313 "rsq", 314 "sample", 315 "sample_c", 316 "sample_c_lz", 317 "sample_l", 318 "sample_d", 319 "sample_b", 320 "sqrt", 321 "switch", 322 "sincos", 323 "udiv", 324 "ult", 325 "uge", 326 "umul", 327 "umad", 328 "umax", 329 "umin", 330 "ushr", 331 "utof", 332 "xor", 333 "dcl_resource", 334 "dcl_constantbuffer", 335 "dcl_sampler", 336 "dcl_index_range", 337 "dcl_gs_output_primitive_topology", 338 "dcl_gs_input_primitive", 339 "dcl_max_output_vertex_count", 340 "dcl_input", 341 "dcl_input_sgv", 342 "dcl_input_siv", 343 "dcl_input_ps", 344 "dcl_input_ps_sgv", 345 "dcl_input_ps_siv", 346 "dcl_output", 347 "dcl_output_sgv", 348 "dcl_output_siv", 349 "dcl_temps", 350 "dcl_indexable_temp", 351 "dcl_global_flags", 352 353 NULL, 354 "lod", 355 "gather4", 356 "sample_pos", 357 "sample_info", 358 359 NULL, 360 "hs_decls", 361 "hs_control_point_phase", 362 "hs_fork_phase", 363 "hs_join_phase", 364 "emit_stream", 365 "cut_stream", 366 "emitthencut_stream", 367 "interface_call", 368 "bufinfo", 369 "deriv_rtx_coarse", 370 "deriv_rtx_fine", 371 "deriv_rty_coarse", 372 "deriv_rty_fine", 373 "gather4_c", 374 "gather4_po", 375 "gather4_po_c", 376 "rcp", 377 "f32tof16", 378 "f16tof32", 379 "uaddc", 380 "usubb", 381 "countbits", 382 "firstbit_hi", 383 "firstbit_lo", 384 "firstbit_shi", 385 "ubfe", 386 "ibfe", 387 "bfi", 388 "bfrev", 389 "swapc", 390 "dcl_stream", 391 "dcl_function_body", 392 "dcl_function_table", 393 "dcl_interface", 394 "dcl_input_control_point_count", 395 "dcl_output_control_point_count", 396 "dcl_tess_domain", 397 "dcl_tess_partitioning", 398 "dcl_tess_output_primitive", 399 "dcl_hs_max_tessfactor", 400 "dcl_hs_fork_phase_instance_count", 401 "dcl_hs_join_phase_instance_count", 402 "dcl_thread_group", 403 "dcl_unordered_access_view_typed", 404 "dcl_unordered_access_view_raw", 405 "dcl_unordered_access_view_structured", 406 "dcl_thread_group_shared_memory_raw", 407 "dcl_thread_group_shared_memory_structured", 408 "dcl_resource_raw", 409 "dcl_resource_structured", 410 "ld_uav_typed", 411 "store_uav_typed", 412 "ld_raw", 413 "store_raw", 414 "ld_structured", 415 "store_structured", 416 "atomic_and", 417 "atomic_or", 418 "atomic_xor", 419 "atomic_cmp_store", 420 "atomic_iadd", 421 "atomic_imax", 422 "atomic_imin", 423 "atomic_umax", 424 "atomic_umin", 425 "imm_atomic_alloc", 426 "imm_atomic_consume", 427 "imm_atomic_iadd", 428 "imm_atomic_and", 429 "imm_atomic_or", 430 "imm_atomic_xor", 431 "imm_atomic_exch", 432 "imm_atomic_cmp_exch", 433 "imm_atomic_imax", 434 "imm_atomic_imin", 435 "imm_atomic_umax", 436 "imm_atomic_umin", 437 "sync", 438 "dadd", 439 "dmax", 440 "dmin", 441 "dmul", 442 "deq", 443 "dge", 444 "dlt", 445 "dne", 446 "dmov", 447 "dmovc", 448 "dtof", 449 "ftod", 450 "eval_snapped", 451 "eval_sample_index", 452 "eval_centroid", 453 "dcl_gs_instance_count", 454 "abort", 455 "debug_break", 456 457 NULL, 458 "ddiv", 459 "dfma", 460 "drcp", 461 "msad", 462 "dtoi", 463 "dtou", 464 "itod", 465 "utod", 466 }; 467 BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOpcode) == DxbcOpcode::Count); 468 getName(DxbcOpcode::Enum _opcode)469 const char* getName(DxbcOpcode::Enum _opcode) 470 { 471 BX_CHECK(_opcode < DxbcOpcode::Count, "Unknown opcode id %d.", _opcode); 472 return s_dxbcOpcode[_opcode]; 473 } 474 475 static const char* s_dxbcSrvType[] = 476 { 477 "", // Unknown 478 "Buffer", // Buffer 479 "Texture1D", // Texture1D 480 "Texture2D", // Texture2D 481 "Texture2DMS", // Texture2DMS 482 "Texture3D", // Texture3D 483 "TextureCube", // TextureCube 484 "Texture1DArray", // Texture1DArray 485 "Texture2DArray", // Texture2DArray 486 "Texture2DMSArray", // Texture2DMSArray 487 "TextureCubearray", // TextureCubearray 488 "RawBuffer", // RawBuffer 489 "StructuredBuffer", // StructuredBuffer 490 }; 491 BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcSrvType) == DxbcResourceDim::Count); 492 493 const char* s_dxbcInterpolationName[] = 494 { 495 "", 496 "constant", 497 "linear", 498 "linear centroid", 499 "linear noperspective", 500 "linear noperspective centroid", 501 "linear sample", 502 "linear noperspective sample", 503 }; 504 BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcInterpolationName) == DxbcInterpolation::Count); 505 506 // mesa/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/shortfiles.txt 507 static const char* s_dxbcOperandType[] = 508 { 509 "r", // Temp 510 "v", // Input 511 "o", // Output 512 "x", // TempArray 513 "l", // Imm32 514 "d", // Imm64 515 "s", // Sampler 516 "t", // Resource 517 "cb", // ConstantBuffer 518 "icb", // ImmConstantBuffer 519 "label", // Label 520 "vPrim", // PrimitiveID 521 "oDepth", // OutputDepth 522 "null", // Null 523 "rasterizer", // Rasterizer 524 "oMask", // CoverageMask 525 "stream", // Stream 526 "function_body", // FunctionBody 527 "function_table", // FunctionTable 528 "interface", // Interface 529 "function_input", // FunctionInput 530 "function_output", // FunctionOutput 531 "vOutputControlPointID", // OutputControlPointId 532 "vForkInstanceID", // InputForkInstanceId 533 "vJoinInstanceID", // InputJoinInstanceId 534 "vicp", // InputControlPoint 535 "vocp", // OutputControlPoint 536 "vpc", // InputPatchConstant 537 "vDomain", // InputDomainPoint 538 "this", // ThisPointer 539 "u", // UnorderedAccessView 540 "g", // ThreadGroupSharedMemory 541 "vThreadID", // InputThreadId 542 "vThreadGrouID", // InputThreadGroupId 543 "vThreadIDInGroup", // InputThreadIdInGroup 544 "vCoverage", // InputCoverageMask 545 "vThreadIDInGroupFlattened", // InputThreadIdInGroupFlattened 546 "vGSInstanceID", // InputGsInstanceId 547 "oDepthGE", // OutputDepthGreaterEqual 548 "oDepthLE", // OutputDepthLessEqual 549 "vCycleCounter", // CycleCounter 550 }; 551 BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOperandType) == DxbcOperandType::Count); 552 553 static const char* s_dxbcCustomDataClass[] = 554 { 555 "Comment", 556 "DebugInfo", 557 "Opaque", 558 "dcl_immediateConstantBuffer", 559 "ShaderMessage", 560 "ClipPlaneConstantMappingsForDx9", 561 }; 562 BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcCustomDataClass) == DxbcCustomDataClass::Count); 563 564 #define DXBC_MAX_NAME_STRING 512 565 readString(bx::ReaderSeekerI * _reader,int64_t _offset,char * _out,uint32_t _max,bx::Error * _err)566 int32_t readString(bx::ReaderSeekerI* _reader, int64_t _offset, char* _out, uint32_t _max, bx::Error* _err) 567 { 568 int64_t oldOffset = bx::seek(_reader); 569 bx::seek(_reader, _offset, bx::Whence::Begin); 570 571 int32_t size = 0; 572 573 for (uint32_t ii = 0; ii < _max-1; ++ii) 574 { 575 char ch; 576 size += bx::read(_reader, ch, _err); 577 *_out++ = ch; 578 579 if ('\0' == ch) 580 { 581 break; 582 } 583 } 584 *_out = '\0'; 585 586 bx::seek(_reader, oldOffset, bx::Whence::Begin); 587 588 return size; 589 } 590 dxbcMixF(uint32_t _b,uint32_t _c,uint32_t _d)591 inline uint32_t dxbcMixF(uint32_t _b, uint32_t _c, uint32_t _d) 592 { 593 const uint32_t tmp0 = bx::uint32_xor(_c, _d); 594 const uint32_t tmp1 = bx::uint32_and(_b, tmp0); 595 const uint32_t result = bx::uint32_xor(_d, tmp1); 596 597 return result; 598 } 599 dxbcMixG(uint32_t _b,uint32_t _c,uint32_t _d)600 inline uint32_t dxbcMixG(uint32_t _b, uint32_t _c, uint32_t _d) 601 { 602 return dxbcMixF(_d, _b, _c); 603 } 604 dxbcMixH(uint32_t _b,uint32_t _c,uint32_t _d)605 inline uint32_t dxbcMixH(uint32_t _b, uint32_t _c, uint32_t _d) 606 { 607 const uint32_t tmp0 = bx::uint32_xor(_b, _c); 608 const uint32_t result = bx::uint32_xor(_d, tmp0); 609 610 return result; 611 } 612 dxbcMixI(uint32_t _b,uint32_t _c,uint32_t _d)613 inline uint32_t dxbcMixI(uint32_t _b, uint32_t _c, uint32_t _d) 614 { 615 const uint32_t tmp0 = bx::uint32_orc(_b, _d); 616 const uint32_t result = bx::uint32_xor(_c, tmp0); 617 618 return result; 619 } 620 dxbcHashBlock(const uint32_t * data,uint32_t * hash)621 void dxbcHashBlock(const uint32_t* data, uint32_t* hash) 622 { 623 const uint32_t d0 = data[ 0]; 624 const uint32_t d1 = data[ 1]; 625 const uint32_t d2 = data[ 2]; 626 const uint32_t d3 = data[ 3]; 627 const uint32_t d4 = data[ 4]; 628 const uint32_t d5 = data[ 5]; 629 const uint32_t d6 = data[ 6]; 630 const uint32_t d7 = data[ 7]; 631 const uint32_t d8 = data[ 8]; 632 const uint32_t d9 = data[ 9]; 633 const uint32_t d10 = data[10]; 634 const uint32_t d11 = data[11]; 635 const uint32_t d12 = data[12]; 636 const uint32_t d13 = data[13]; 637 const uint32_t d14 = data[14]; 638 const uint32_t d15 = data[15]; 639 640 uint32_t aa = hash[0]; 641 uint32_t bb = hash[1]; 642 uint32_t cc = hash[2]; 643 uint32_t dd = hash[3]; 644 645 aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d0 + 0xd76aa478, 7); 646 dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d1 + 0xe8c7b756, 12); 647 cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d2 + 0x242070db, 15); 648 bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d3 + 0xc1bdceee, 10); 649 aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d4 + 0xf57c0faf, 7); 650 dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d5 + 0x4787c62a, 12); 651 cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d6 + 0xa8304613, 15); 652 bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d7 + 0xfd469501, 10); 653 aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d8 + 0x698098d8, 7); 654 dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d9 + 0x8b44f7af, 12); 655 cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d10 + 0xffff5bb1, 15); 656 bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d11 + 0x895cd7be, 10); 657 aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d12 + 0x6b901122, 7); 658 dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d13 + 0xfd987193, 12); 659 cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d14 + 0xa679438e, 15); 660 bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d15 + 0x49b40821, 10); 661 662 aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d1 + 0xf61e2562, 5); 663 dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d6 + 0xc040b340, 9); 664 cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d11 + 0x265e5a51, 14); 665 bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d0 + 0xe9b6c7aa, 12); 666 aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d5 + 0xd62f105d, 5); 667 dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d10 + 0x02441453, 9); 668 cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d15 + 0xd8a1e681, 14); 669 bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d4 + 0xe7d3fbc8, 12); 670 aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d9 + 0x21e1cde6, 5); 671 dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d14 + 0xc33707d6, 9); 672 cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d3 + 0xf4d50d87, 14); 673 bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d8 + 0x455a14ed, 12); 674 aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d13 + 0xa9e3e905, 5); 675 dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d2 + 0xfcefa3f8, 9); 676 cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d7 + 0x676f02d9, 14); 677 bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d12 + 0x8d2a4c8a, 12); 678 679 aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d5 + 0xfffa3942, 4); 680 dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d8 + 0x8771f681, 11); 681 cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d11 + 0x6d9d6122, 16); 682 bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d14 + 0xfde5380c, 9); 683 aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d1 + 0xa4beea44, 4); 684 dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d4 + 0x4bdecfa9, 11); 685 cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d7 + 0xf6bb4b60, 16); 686 bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d10 + 0xbebfbc70, 9); 687 aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d13 + 0x289b7ec6, 4); 688 dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d0 + 0xeaa127fa, 11); 689 cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d3 + 0xd4ef3085, 16); 690 bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d6 + 0x04881d05, 9); 691 aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d9 + 0xd9d4d039, 4); 692 dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d12 + 0xe6db99e5, 11); 693 cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d15 + 0x1fa27cf8, 16); 694 bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d2 + 0xc4ac5665, 9); 695 696 aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d0 + 0xf4292244, 6); 697 dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d7 + 0x432aff97, 10); 698 cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d14 + 0xab9423a7, 15); 699 bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d5 + 0xfc93a039, 11); 700 aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d12 + 0x655b59c3, 6); 701 dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d3 + 0x8f0ccc92, 10); 702 cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d10 + 0xffeff47d, 15); 703 bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d1 + 0x85845dd1, 11); 704 aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d8 + 0x6fa87e4f, 6); 705 dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d15 + 0xfe2ce6e0, 10); 706 cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d6 + 0xa3014314, 15); 707 bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d13 + 0x4e0811a1, 11); 708 aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d4 + 0xf7537e82, 6); 709 dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d11 + 0xbd3af235, 10); 710 cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d2 + 0x2ad7d2bb, 15); 711 bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d9 + 0xeb86d391, 11); 712 713 hash[0] += aa; 714 hash[1] += bb; 715 hash[2] += cc; 716 hash[3] += dd; 717 } 718 719 // dxbc hash function is slightly modified version of MD5 hash. 720 // https://web.archive.org/web/20190207230524/https://tools.ietf.org/html/rfc1321 721 // https://web.archive.org/web/20190207230538/http://www.efgh.com/software/md5.txt 722 // 723 // Assumption is that data pointer, size are both 4-byte aligned, 724 // and little endian. 725 // dxbcHash(const void * _data,uint32_t _size,void * _digest)726 void dxbcHash(const void* _data, uint32_t _size, void* _digest) 727 { 728 uint32_t hash[4] = 729 { 730 0x67452301, 731 0xefcdab89, 732 0x98badcfe, 733 0x10325476, 734 }; 735 736 const uint32_t* data = (const uint32_t*)_data; 737 for (uint32_t ii = 0, num = _size/64; ii < num; ++ii) 738 { 739 dxbcHashBlock(data, hash); 740 data += 16; 741 } 742 743 uint32_t last[16]; 744 bx::memSet(last, 0, sizeof(last) ); 745 746 const uint32_t remaining = _size & 0x3f; 747 748 if (remaining >= 56) 749 { 750 bx::memCopy(&last[0], data, remaining); 751 last[remaining/4] = 0x80; 752 dxbcHashBlock(last, hash); 753 754 bx::memSet(&last[1], 0, 56); 755 } 756 else 757 { 758 bx::memCopy(&last[1], data, remaining); 759 last[1 + remaining/4] = 0x80; 760 } 761 762 last[ 0] = _size * 8; 763 last[15] = _size * 2 + 1; 764 dxbcHashBlock(last, hash); 765 766 bx::memCopy(_digest, hash, 16); 767 } 768 read(bx::ReaderI * _reader,DxbcSubOperand & _subOperand,bx::Error * _err)769 int32_t read(bx::ReaderI* _reader, DxbcSubOperand& _subOperand, bx::Error* _err) 770 { 771 uint32_t token; 772 int32_t size = 0; 773 774 // 0 1 2 3 775 // 76543210765432107654321076543210 776 // e222111000nnttttttttssssssssmmoo 777 // ^^ ^ ^ ^ ^ ^ ^ ^-- number of operands 778 // || | | | | | +---- operand mode 779 // || | | | | +------------ operand mode bits 780 // || | | | +-------------------- type 781 // || | | +---------------------- number of addressing modes 782 // || | +------------------------- addressing mode 0 783 // || +---------------------------- addressing mode 1 784 // |+------------------------------- addressing mode 2 785 // +-------------------------------- extended 786 787 size += bx::read(_reader, token, _err); 788 _subOperand.type = DxbcOperandType::Enum( (token & UINT32_C(0x000ff000) ) >> 12); 789 _subOperand.numAddrModes = uint8_t( (token & UINT32_C(0x00300000) ) >> 20); 790 _subOperand.addrMode = uint8_t( (token & UINT32_C(0x01c00000) ) >> 22); 791 _subOperand.mode = DxbcOperandMode::Enum( (token & UINT32_C(0x0000000c) ) >> 2); 792 _subOperand.modeBits = uint8_t( (token & UINT32_C(0x00000ff0) ) >> 4) & "\x0f\xff\x03\x00"[_subOperand.mode]; 793 _subOperand.num = uint8_t( (token & UINT32_C(0x00000003) ) ); 794 795 switch (_subOperand.addrMode) 796 { 797 case DxbcOperandAddrMode::Imm32: 798 size += bx::read(_reader, _subOperand.regIndex, _err); 799 break; 800 801 case DxbcOperandAddrMode::Reg: 802 { 803 DxbcSubOperand subOperand; 804 size += read(_reader, subOperand, _err); 805 } 806 break; 807 808 case DxbcOperandAddrMode::RegImm32: 809 { 810 size += bx::read(_reader, _subOperand.regIndex, _err); 811 812 DxbcSubOperand subOperand; 813 size += read(_reader, subOperand, _err); 814 } 815 break; 816 817 case DxbcOperandAddrMode::RegImm64: 818 { 819 size += bx::read(_reader, _subOperand.regIndex, _err); 820 size += bx::read(_reader, _subOperand.regIndex, _err); 821 822 DxbcSubOperand subOperand; 823 size += read(_reader, subOperand, _err); 824 } 825 break; 826 827 default: 828 BX_CHECK(false, "sub operand addressing mode %d", _subOperand.addrMode); 829 break; 830 } 831 832 return size; 833 } 834 write(bx::WriterI * _writer,const DxbcSubOperand & _subOperand,bx::Error * _err)835 int32_t write(bx::WriterI* _writer, const DxbcSubOperand& _subOperand, bx::Error* _err) 836 { 837 int32_t size = 0; 838 839 uint32_t token = 0; 840 token |= (_subOperand.type << 12) & UINT32_C(0x000ff000); 841 token |= (_subOperand.numAddrModes << 20) & UINT32_C(0x00300000); 842 token |= (_subOperand.addrMode << 22) & UINT32_C(0x01c00000); 843 token |= (_subOperand.mode << 2) & UINT32_C(0x0000000c); 844 token |= (_subOperand.modeBits << 4) & UINT32_C(0x00000ff0); 845 token |= _subOperand.num & UINT32_C(0x00000003); 846 size += bx::write(_writer, token, _err); 847 848 switch (_subOperand.addrMode) 849 { 850 case DxbcOperandAddrMode::Imm32: 851 size += bx::write(_writer, _subOperand.regIndex, _err); 852 break; 853 854 case DxbcOperandAddrMode::Reg: 855 { 856 DxbcSubOperand subOperand; 857 size += write(_writer, subOperand, _err); 858 } 859 break; 860 861 case DxbcOperandAddrMode::RegImm32: 862 { 863 size += bx::write(_writer, _subOperand.regIndex, _err); 864 865 DxbcSubOperand subOperand; 866 size += write(_writer, subOperand, _err); 867 } 868 break; 869 870 case DxbcOperandAddrMode::RegImm64: 871 { 872 size += bx::write(_writer, _subOperand.regIndex, _err); 873 size += bx::write(_writer, _subOperand.regIndex, _err); 874 875 DxbcSubOperand subOperand; 876 size += write(_writer, subOperand, _err); 877 } 878 break; 879 880 default: 881 BX_CHECK(false, "sub operand addressing mode %d", _subOperand.addrMode); 882 break; 883 } 884 885 return size; 886 } 887 read(bx::ReaderI * _reader,DxbcOperand & _operand,bx::Error * _err)888 int32_t read(bx::ReaderI* _reader, DxbcOperand& _operand, bx::Error* _err) 889 { 890 int32_t size = 0; 891 892 uint32_t token; 893 size += bx::read(_reader, token, _err); 894 895 // 0 1 2 3 896 // 76543210765432107654321076543210 897 // e222111000nnttttttttssssssssmmoo 898 // ^^ ^ ^ ^ ^ ^ ^ ^-- number of operands 899 // || | | | | | +---- operand mode 900 // || | | | | +------------ operand mode bits 901 // || | | | +-------------------- type 902 // || | | +---------------------- number of addressing modes 903 // || | +------------------------- addressing mode 0 904 // || +---------------------------- addressing mode 1 905 // |+------------------------------- addressing mode 2 906 // +-------------------------------- extended 907 908 _operand.numAddrModes = uint8_t( (token & UINT32_C(0x00300000) ) >> 20); 909 _operand.addrMode[0] = uint8_t( (token & UINT32_C(0x01c00000) ) >> 22); 910 _operand.addrMode[1] = uint8_t( (token & UINT32_C(0x0e000000) ) >> 25); 911 _operand.addrMode[2] = uint8_t( (token & UINT32_C(0x70000000) ) >> 28); 912 _operand.type = DxbcOperandType::Enum( (token & UINT32_C(0x000ff000) ) >> 12); 913 _operand.mode = DxbcOperandMode::Enum( (token & UINT32_C(0x0000000c) ) >> 2); 914 _operand.modeBits = uint8_t( (token & UINT32_C(0x00000ff0) ) >> 4) & "\x0f\xff\x03\x00"[_operand.mode]; 915 _operand.num = uint8_t( (token & UINT32_C(0x00000003) ) ); 916 917 const bool extended = 0 != (token & UINT32_C(0x80000000) ); 918 if (extended) 919 { 920 uint32_t extBits = 0; 921 size += bx::read(_reader, extBits, _err); 922 923 _operand.modifier = DxbcOperandModifier::Enum( (extBits & UINT32_C(0x00003fc0) ) >> 6); 924 } 925 else 926 { 927 _operand.modifier = DxbcOperandModifier::None; 928 } 929 930 switch (_operand.type) 931 { 932 case DxbcOperandType::Imm32: 933 _operand.num = 2 == _operand.num ? 4 : _operand.num; 934 for (uint32_t ii = 0; ii < _operand.num; ++ii) 935 { 936 size += bx::read(_reader, _operand.un.imm32[ii], _err); 937 } 938 break; 939 940 case DxbcOperandType::Imm64: 941 _operand.num = 2 == _operand.num ? 4 : _operand.num; 942 for (uint32_t ii = 0; ii < _operand.num; ++ii) 943 { 944 size += bx::read(_reader, _operand.un.imm64[ii], _err); 945 } 946 break; 947 948 default: 949 break; 950 } 951 952 for (uint32_t ii = 0; ii < _operand.numAddrModes; ++ii) 953 { 954 switch (_operand.addrMode[ii]) 955 { 956 case DxbcOperandAddrMode::Imm32: 957 size += bx::read(_reader, _operand.regIndex[ii], _err); 958 break; 959 960 case DxbcOperandAddrMode::Reg: 961 size += read(_reader, _operand.subOperand[ii], _err); 962 break; 963 964 case DxbcOperandAddrMode::RegImm32: 965 size += bx::read(_reader, _operand.regIndex[ii], _err); 966 size += read(_reader, _operand.subOperand[ii], _err); 967 break; 968 969 default: 970 BX_CHECK(false, "operand %d addressing mode %d", ii, _operand.addrMode[ii]); 971 break; 972 } 973 } 974 975 return size; 976 } 977 write(bx::WriterI * _writer,const DxbcOperand & _operand,bx::Error * _err)978 int32_t write(bx::WriterI* _writer, const DxbcOperand& _operand, bx::Error* _err) 979 { 980 int32_t size = 0; 981 982 const bool extended = _operand.modifier != DxbcOperandModifier::None; 983 984 uint32_t token = 0; 985 token |= extended ? UINT32_C(0x80000000) : 0; 986 token |= (_operand.numAddrModes << 20) & UINT32_C(0x00300000); 987 token |= (_operand.addrMode[0] << 22) & UINT32_C(0x01c00000); 988 token |= (_operand.addrMode[1] << 25) & UINT32_C(0x0e000000); 989 token |= (_operand.addrMode[2] << 28) & UINT32_C(0x70000000); 990 token |= (_operand.type << 12) & UINT32_C(0x000ff000); 991 token |= (_operand.mode << 2) & UINT32_C(0x0000000c); 992 993 token |= (4 == _operand.num ? 2 : _operand.num) & UINT32_C(0x00000003); 994 token |= ( (_operand.modeBits & "\x0f\xff\x03\x00"[_operand.mode]) << 4) & UINT32_C(0x00000ff0); 995 996 size += bx::write(_writer, token, _err); 997 998 if (extended) 999 { 1000 uint32_t extBits = 0 1001 | ( (_operand.modifier << 6) & UINT32_C(0x00003fc0) ) 1002 | 1 /* 1 == has extended operand modifier */ 1003 ; 1004 size += bx::write(_writer, extBits, _err); 1005 } 1006 1007 switch (_operand.type) 1008 { 1009 case DxbcOperandType::Imm32: 1010 for (uint32_t ii = 0; ii < _operand.num; ++ii) 1011 { 1012 size += bx::write(_writer, _operand.un.imm32[ii], _err); 1013 } 1014 break; 1015 1016 case DxbcOperandType::Imm64: 1017 for (uint32_t ii = 0; ii < _operand.num; ++ii) 1018 { 1019 size += bx::write(_writer, _operand.un.imm64[ii], _err); 1020 } 1021 break; 1022 1023 default: 1024 break; 1025 } 1026 1027 for (uint32_t ii = 0, num = bx::uint32_min(_operand.numAddrModes, BX_COUNTOF(_operand.addrMode) ); ii < num; ++ii) 1028 { 1029 switch (_operand.addrMode[ii]) 1030 { 1031 case DxbcOperandAddrMode::Imm32: 1032 size += bx::write(_writer, _operand.regIndex[ii], _err); 1033 break; 1034 1035 case DxbcOperandAddrMode::Reg: 1036 size += write(_writer, _operand.subOperand[ii], _err); 1037 break; 1038 1039 case DxbcOperandAddrMode::RegImm32: 1040 size += bx::write(_writer, _operand.regIndex[ii], _err); 1041 size += write(_writer, _operand.subOperand[ii], _err); 1042 break; 1043 1044 default: 1045 BX_CHECK(false, "operand %d addressing mode %d", ii, _operand.addrMode[ii]); 1046 break; 1047 } 1048 } 1049 1050 return size; 1051 } 1052 read(bx::ReaderI * _reader,DxbcInstruction & _instruction,bx::Error * _err)1053 int32_t read(bx::ReaderI* _reader, DxbcInstruction& _instruction, bx::Error* _err) 1054 { 1055 int32_t size = 0; 1056 1057 uint32_t token; 1058 size += bx::read(_reader, token, _err); 1059 1060 // 0 1 2 3 1061 // 76543210765432107654321076543210 1062 // elllllll.............ooooooooooo 1063 // ^^ ^----------- opcode 1064 // |+------------------------------- length 1065 // +-------------------------------- extended 1066 1067 _instruction.opcode = DxbcOpcode::Enum( (token & UINT32_C(0x000007ff) ) ); 1068 _instruction.length = uint8_t( (token & UINT32_C(0x7f000000) ) >> 24); 1069 bool extended = 0 != (token & UINT32_C(0x80000000) ); 1070 1071 _instruction.srv = DxbcResourceDim::Unknown; 1072 _instruction.samples = 0; 1073 1074 _instruction.shadow = false; 1075 _instruction.mono = false; 1076 1077 _instruction.allowRefactoring = false; 1078 _instruction.fp64 = false; 1079 _instruction.earlyDepth = false; 1080 _instruction.enableBuffers = false; 1081 _instruction.skipOptimization = false; 1082 _instruction.enableMinPrecision = false; 1083 _instruction.enableDoubleExtensions = false; 1084 _instruction.enableShaderExtensions = false; 1085 1086 _instruction.threadsInGroup = false; 1087 _instruction.sharedMemory = false; 1088 _instruction.uavGroup = false; 1089 _instruction.uavGlobal = false; 1090 1091 _instruction.saturate = false; 1092 _instruction.testNZ = false; 1093 _instruction.retType = DxbcResourceReturnType::Unused; 1094 1095 _instruction.customDataClass = DxbcCustomDataClass::Comment; 1096 _instruction.customData.clear(); 1097 1098 switch (_instruction.opcode) 1099 { 1100 case DxbcOpcode::CUSTOMDATA: 1101 { 1102 _instruction.customDataClass = DxbcCustomDataClass::Enum( (token & UINT32_C(0xfffff800) ) >> 11); 1103 1104 _instruction.numOperands = 0; 1105 size += bx::read(_reader, _instruction.length, _err); 1106 for (uint32_t ii = 0, num = (_instruction.length-2); ii < num && _err->isOk(); ++ii) 1107 { 1108 uint32_t temp; 1109 size += bx::read(_reader, temp, _err); 1110 if (_err->isOk() ) 1111 { 1112 _instruction.customData.push_back(temp); 1113 } 1114 } 1115 } 1116 return size; 1117 1118 case DxbcOpcode::DCL_CONSTANT_BUFFER: 1119 // 0 1 2 3 1120 // 76543210765432107654321076543210 1121 // ........ a........... 1122 // ^------------ Allow refactoring 1123 1124 _instruction.allowRefactoring = 0 != (token & UINT32_C(0x00000800) ); 1125 break; 1126 1127 case DxbcOpcode::DCL_GLOBAL_FLAGS: 1128 // 0 1 2 3 1129 // 76543210765432107654321076543210 1130 // ........ sxmoudfa........... 1131 // ^^^^^^^^------------ Allow refactoring 1132 // ||||||+------------- FP64 1133 // |||||+-------------- Force early depth/stencil 1134 // ||||+--------------- Enable raw and structured buffers 1135 // |||+---------------- Skip optimizations 1136 // ||+----------------- Enable minimum precision 1137 // |+------------------ Enable double extension 1138 // +------------------- Enable shader extension 1139 1140 _instruction.allowRefactoring = 0 != (token & UINT32_C(0x00000800) ); 1141 _instruction.fp64 = 0 != (token & UINT32_C(0x00001000) ); 1142 _instruction.earlyDepth = 0 != (token & UINT32_C(0x00002000) ); 1143 _instruction.enableBuffers = 0 != (token & UINT32_C(0x00004000) ); 1144 _instruction.skipOptimization = 0 != (token & UINT32_C(0x00008000) ); 1145 _instruction.enableMinPrecision = 0 != (token & UINT32_C(0x00010000) ); 1146 _instruction.enableDoubleExtensions = 0 != (token & UINT32_C(0x00020000) ); 1147 _instruction.enableShaderExtensions = 0 != (token & UINT32_C(0x00040000) ); 1148 break; 1149 1150 case DxbcOpcode::DCL_INPUT_PS: 1151 // 0 1 2 3 1152 // 76543210765432107654321076543210 1153 // ........ iiiii........... 1154 // ^---------------- Interploation 1155 1156 _instruction.interpolation = DxbcInterpolation::Enum( (token & UINT32_C(0x0000f800) ) >> 11); 1157 break; 1158 1159 case DxbcOpcode::DCL_RESOURCE: 1160 // 0 1 2 3 1161 // 76543210765432107654321076543210 1162 // ........ sssssssrrrrr........... 1163 // ^ ^---------------- SRV 1164 // +----------------------- MSAA samples 1165 1166 _instruction.srv = DxbcResourceDim::Enum( (token & UINT32_C(0x0000f800) ) >> 11); 1167 _instruction.samples = uint8_t( (token & UINT32_C(0x007f0000) ) >> 16); 1168 break; 1169 1170 case DxbcOpcode::DCL_SAMPLER: 1171 // 0 1 2 3 1172 // 76543210765432107654321076543210 1173 // ........ ms........... 1174 // ^^------------ Shadow sampler 1175 // +------------- Mono 1176 1177 _instruction.shadow = 0 != (token & UINT32_C(0x00000800) ); 1178 _instruction.mono = 0 != (token & UINT32_C(0x00001000) ); 1179 break; 1180 1181 case DxbcOpcode::SYNC: 1182 // 0 1 2 3 1183 // 76543210765432107654321076543210 1184 // ........ gust........... 1185 // ^^^^------------ Threads in group 1186 // ||+------------- Shared memory 1187 // |+-------------- UAV group 1188 // +--------------- UAV global 1189 1190 _instruction.threadsInGroup = 0 != (token & UINT32_C(0x00000800) ); 1191 _instruction.sharedMemory = 0 != (token & UINT32_C(0x00001000) ); 1192 _instruction.uavGroup = 0 != (token & UINT32_C(0x00002000) ); 1193 _instruction.uavGlobal = 0 != (token & UINT32_C(0x00004000) ); 1194 break; 1195 1196 default: 1197 // 0 1 2 3 1198 // 76543210765432107654321076543210 1199 // ........ ppppn stt........... 1200 // ^ ^ ^^------------- Resource info return type 1201 // | | +-------------- Saturate 1202 // | +------------------- Test not zero 1203 // +----------------------- Precise mask 1204 1205 _instruction.retType = DxbcResourceReturnType::Enum( (token & UINT32_C(0x00001800) ) >> 11); 1206 _instruction.saturate = 0 != (token & UINT32_C(0x00002000) ); 1207 _instruction.testNZ = 0 != (token & UINT32_C(0x00040000) ); 1208 // _instruction.precise = uint8_t( (token & UINT32_C(0x00780000) ) >> 19); 1209 break; 1210 } 1211 1212 _instruction.extended[0] = DxbcInstruction::ExtendedType::Count; 1213 for (uint32_t ii = 0; extended; ++ii) 1214 { 1215 // 0 1 2 3 1216 // 76543210765432107654321076543210 1217 // e..........................ttttt 1218 // ^ ^ 1219 // | +----- type 1220 // +-------------------------------- extended 1221 1222 uint32_t extBits; 1223 size += bx::read(_reader, extBits, _err); 1224 extended = 0 != (extBits & UINT32_C(0x80000000) ); 1225 _instruction.extended[ii ] = DxbcInstruction::ExtendedType::Enum(extBits & UINT32_C(0x0000001f) ); 1226 _instruction.extended[ii+1] = DxbcInstruction::ExtendedType::Count; 1227 1228 switch (_instruction.extended[ii]) 1229 { 1230 case DxbcInstruction::ExtendedType::SampleControls: 1231 // 0 1 2 3 1232 // 76543210765432107654321076543210 1233 // . zzzzyyyyxxxx ..... 1234 // ^ ^ ^ 1235 // | | +------------- x 1236 // | +----------------- y 1237 // +--------------------- z 1238 1239 _instruction.sampleOffsets[0] = uint8_t( (extBits & UINT32_C(0x00001e00) ) >> 9); 1240 _instruction.sampleOffsets[1] = uint8_t( (extBits & UINT32_C(0x0001e000) ) >> 13); 1241 _instruction.sampleOffsets[2] = uint8_t( (extBits & UINT32_C(0x001e0000) ) >> 17); 1242 break; 1243 1244 case DxbcInstruction::ExtendedType::ResourceDim: 1245 // 0 1 2 3 1246 // 76543210765432107654321076543210 1247 // . ..... 1248 // 1249 1250 _instruction.resourceTarget = uint8_t( (extBits & UINT32_C(0x000003e0) ) >> 6); 1251 _instruction.resourceStride = uint8_t( (extBits & UINT32_C(0x0000f800) ) >> 11); 1252 break; 1253 1254 case DxbcInstruction::ExtendedType::ResourceReturnType: 1255 // 0 1 2 3 1256 // 76543210765432107654321076543210 1257 // . 3333222211110000..... 1258 // ^ ^ ^ 1259 // | | +------------- x 1260 // | +----------------- y 1261 // +--------------------- z 1262 1263 _instruction.resourceReturnTypes[0] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x000001e0) ) >> 6); 1264 _instruction.resourceReturnTypes[1] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x00001e00) ) >> 9); 1265 _instruction.resourceReturnTypes[2] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x0001e000) ) >> 13); 1266 _instruction.resourceReturnTypes[3] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x001e0000) ) >> 17); 1267 break; 1268 1269 default: 1270 break; 1271 } 1272 } 1273 1274 switch (_instruction.opcode) 1275 { 1276 case DxbcOpcode::DCL_FUNCTION_TABLE: 1277 { 1278 uint32_t tableId; 1279 size += read(_reader, tableId, _err); 1280 1281 uint32_t num; 1282 size += read(_reader, num); 1283 1284 for (uint32_t ii = 0; ii < num; ++ii) 1285 { 1286 uint32_t bodyId; 1287 size += read(_reader, bodyId, _err); 1288 } 1289 } 1290 break; 1291 1292 case DxbcOpcode::DCL_INTERFACE: 1293 { 1294 uint32_t interfaceId; 1295 size += read(_reader, interfaceId, _err); 1296 1297 uint32_t num; 1298 size += read(_reader, num, _err); 1299 1300 BX_CHECK(false, "not implemented."); 1301 } 1302 break; 1303 1304 default: 1305 break; 1306 }; 1307 1308 uint32_t currOp = 0; 1309 1310 const DxbcOpcodeInfo& info = s_dxbcOpcodeInfo[_instruction.opcode]; 1311 _instruction.numOperands = info.numOperands; 1312 switch (info.numOperands) 1313 { 1314 case 6: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH; 1315 case 5: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH; 1316 case 4: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH; 1317 case 3: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH; 1318 case 2: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH; 1319 case 1: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH; 1320 case 0: 1321 if (0 < info.numValues) 1322 { 1323 size += read(_reader, _instruction.value, info.numValues*sizeof(uint32_t), _err); 1324 } 1325 break; 1326 1327 default: 1328 BX_CHECK(false, "Instruction %s with invalid number of operands %d (numValues %d)." 1329 , getName(_instruction.opcode) 1330 , info.numOperands 1331 , info.numValues 1332 ); 1333 break; 1334 } 1335 1336 return size; 1337 } 1338 write(bx::WriterI * _writer,const DxbcInstruction & _instruction,bx::Error * _err)1339 int32_t write(bx::WriterI* _writer, const DxbcInstruction& _instruction, bx::Error* _err) 1340 { 1341 uint32_t token = 0; 1342 token |= (_instruction.opcode ) & UINT32_C(0x000007ff); 1343 token |= (_instruction.length << 24) & UINT32_C(0x7f000000); 1344 1345 token |= DxbcInstruction::ExtendedType::Count != _instruction.extended[0] 1346 ? UINT32_C(0x80000000) 1347 : 0 1348 ; 1349 1350 int32_t size =0; 1351 1352 switch (_instruction.opcode) 1353 { 1354 case DxbcOpcode::CUSTOMDATA: 1355 { 1356 token &= UINT32_C(0x000007ff); 1357 token |= _instruction.customDataClass << 11; 1358 1359 size += bx::write(_writer, token); 1360 1361 uint32_t len = uint32_t(_instruction.customData.size()*sizeof(uint32_t) ); 1362 size += bx::write(_writer, len/4+2, _err); 1363 size += bx::write(_writer, _instruction.customData.data(), len, _err); 1364 } 1365 return size; 1366 1367 case DxbcOpcode::DCL_CONSTANT_BUFFER: 1368 token |= _instruction.allowRefactoring ? UINT32_C(0x00000800) : 0; 1369 break; 1370 1371 case DxbcOpcode::DCL_GLOBAL_FLAGS: 1372 token |= _instruction.allowRefactoring ? UINT32_C(0x00000800) : 0; 1373 token |= _instruction.fp64 ? UINT32_C(0x00001000) : 0; 1374 token |= _instruction.earlyDepth ? UINT32_C(0x00002000) : 0; 1375 token |= _instruction.enableBuffers ? UINT32_C(0x00004000) : 0; 1376 token |= _instruction.skipOptimization ? UINT32_C(0x00008000) : 0; 1377 token |= _instruction.enableMinPrecision ? UINT32_C(0x00010000) : 0; 1378 token |= _instruction.enableDoubleExtensions ? UINT32_C(0x00020000) : 0; 1379 token |= _instruction.enableShaderExtensions ? UINT32_C(0x00040000) : 0; 1380 break; 1381 1382 case DxbcOpcode::DCL_INPUT_PS: 1383 token |= (_instruction.interpolation << 11) & UINT32_C(0x0000f800); 1384 break; 1385 1386 case DxbcOpcode::DCL_RESOURCE: 1387 token |= (_instruction.srv << 11) & UINT32_C(0x0000f800); 1388 token |= (_instruction.samples << 16) & UINT32_C(0x007f0000); 1389 break; 1390 1391 case DxbcOpcode::DCL_SAMPLER: 1392 token |= _instruction.shadow ? (0x00000800) : 0; 1393 token |= _instruction.mono ? (0x00001000) : 0; 1394 break; 1395 1396 case DxbcOpcode::SYNC: 1397 token |= _instruction.threadsInGroup ? UINT32_C(0x00000800) : 0; 1398 token |= _instruction.sharedMemory ? UINT32_C(0x00001000) : 0; 1399 token |= _instruction.uavGroup ? UINT32_C(0x00002000) : 0; 1400 token |= _instruction.uavGlobal ? UINT32_C(0x00004000) : 0; 1401 break; 1402 1403 default: 1404 token |= (_instruction.retType << 11) & UINT32_C(0x00001800); 1405 token |= _instruction.saturate ? UINT32_C(0x00002000) : 0; 1406 token |= _instruction.testNZ ? UINT32_C(0x00040000) : 0; 1407 // _instruction.precise = uint8_t( (token & UINT32_C(0x00780000) ) >> 19); 1408 break; 1409 } 1410 1411 size += bx::write(_writer, token); 1412 1413 for (uint32_t ii = 0; _instruction.extended[ii] != DxbcInstruction::ExtendedType::Count; ++ii) 1414 { 1415 // 0 1 2 3 1416 // 76543210765432107654321076543210 1417 // e..........................ttttt 1418 // ^ ^ 1419 // | +----- type 1420 // +-------------------------------- extended 1421 1422 token = _instruction.extended[ii+1] == DxbcInstruction::ExtendedType::Count 1423 ? 0 1424 : UINT32_C(0x80000000) 1425 ; 1426 token |= uint8_t(_instruction.extended[ii]); 1427 1428 switch (_instruction.extended[ii]) 1429 { 1430 case DxbcInstruction::ExtendedType::SampleControls: 1431 // 0 1 2 3 1432 // 76543210765432107654321076543210 1433 // . zzzzyyyyxxxx ..... 1434 // ^ ^ ^ 1435 // | | +------------- x 1436 // | +----------------- y 1437 // +--------------------- z 1438 1439 token |= (uint32_t(_instruction.sampleOffsets[0]) << 9) & UINT32_C(0x00001e00); 1440 token |= (uint32_t(_instruction.sampleOffsets[1]) << 13) & UINT32_C(0x0001e000); 1441 token |= (uint32_t(_instruction.sampleOffsets[2]) << 17) & UINT32_C(0x001e0000); 1442 break; 1443 1444 case DxbcInstruction::ExtendedType::ResourceDim: 1445 // 0 1 2 3 1446 // 76543210765432107654321076543210 1447 // . ..... 1448 // 1449 1450 token |= (uint32_t(_instruction.resourceTarget << 6) & UINT32_C(0x000003e0) ); 1451 token |= (uint32_t(_instruction.resourceStride << 11) & UINT32_C(0x0000f800) ); 1452 break; 1453 1454 case DxbcInstruction::ExtendedType::ResourceReturnType: 1455 // 0 1 2 3 1456 // 76543210765432107654321076543210 1457 // . 3333222211110000..... 1458 // ^ ^ ^ 1459 // | | +------------- x 1460 // | +----------------- y 1461 // +--------------------- z 1462 1463 token |= (uint32_t(_instruction.resourceReturnTypes[0]) << 6) & UINT32_C(0x000001e0); 1464 token |= (uint32_t(_instruction.resourceReturnTypes[1]) << 9) & UINT32_C(0x00001e00); 1465 token |= (uint32_t(_instruction.resourceReturnTypes[2]) << 13) & UINT32_C(0x0001e000); 1466 token |= (uint32_t(_instruction.resourceReturnTypes[3]) << 17) & UINT32_C(0x001e0000); 1467 break; 1468 1469 default: 1470 break; 1471 } 1472 1473 size += bx::write(_writer, token, _err); 1474 } 1475 1476 for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii) 1477 { 1478 size += write(_writer, _instruction.operand[ii], _err); 1479 } 1480 1481 const DxbcOpcodeInfo& info = s_dxbcOpcodeInfo[_instruction.opcode]; 1482 if (0 < info.numValues) 1483 { 1484 size += bx::write(_writer, _instruction.value, info.numValues*sizeof(uint32_t), _err); 1485 } 1486 1487 return size; 1488 } 1489 toString(char * _out,int32_t _size,DxbcOperandMode::Enum _mode,uint8_t _modeBits)1490 int32_t toString(char* _out, int32_t _size, DxbcOperandMode::Enum _mode, uint8_t _modeBits) 1491 { 1492 int32_t size = 0; 1493 1494 switch (_mode) 1495 { 1496 case DxbcOperandMode::Mask: 1497 if (0xf > _modeBits 1498 && 0 < _modeBits) 1499 { 1500 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1501 , ".%s%s%s%s" 1502 , 0 == (_modeBits & 1) ? "" : "x" 1503 , 0 == (_modeBits & 2) ? "" : "y" 1504 , 0 == (_modeBits & 4) ? "" : "z" 1505 , 0 == (_modeBits & 8) ? "" : "w" 1506 ); 1507 } 1508 break; 1509 1510 case DxbcOperandMode::Swizzle: 1511 if (0xe4 != _modeBits) 1512 { 1513 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1514 , ".%c%c%c%c" 1515 , "xyzw"[(_modeBits )&0x3] 1516 , "xyzw"[(_modeBits>>2)&0x3] 1517 , "xyzw"[(_modeBits>>4)&0x3] 1518 , "xyzw"[(_modeBits>>6)&0x3] 1519 ); 1520 } 1521 break; 1522 1523 case DxbcOperandMode::Scalar: 1524 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1525 , ".%c" 1526 , "xyzw"[_modeBits] 1527 ); 1528 break; 1529 1530 default: 1531 break; 1532 } 1533 1534 return size; 1535 } 1536 toString(char * _out,int32_t _size,const DxbcInstruction & _instruction)1537 int32_t toString(char* _out, int32_t _size, const DxbcInstruction& _instruction) 1538 { 1539 int32_t size = 0; 1540 1541 switch (_instruction.opcode) 1542 { 1543 case DxbcOpcode::CUSTOMDATA: 1544 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1545 , "%s" 1546 , s_dxbcCustomDataClass[_instruction.customDataClass] 1547 ); 1548 break; 1549 1550 case DxbcOpcode::IF: 1551 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1552 , "%s%s" 1553 , getName(_instruction.opcode) 1554 , _instruction.testNZ ? "_nz" : "_z" 1555 ); 1556 break; 1557 1558 default: 1559 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1560 , "%s%s%s" 1561 , getName(_instruction.opcode) 1562 , _instruction.saturate ? "_sat" : "" 1563 , _instruction.testNZ ? "_nz" : "" 1564 ); 1565 break; 1566 } 1567 1568 if (DxbcResourceDim::Unknown != _instruction.srv) 1569 { 1570 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1571 , " %s<%x>" 1572 , s_dxbcSrvType[_instruction.srv] 1573 , _instruction.value[0] 1574 ); 1575 } 1576 else if (0 < s_dxbcOpcodeInfo[_instruction.opcode].numValues) 1577 { 1578 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1579 , " %d" 1580 , _instruction.value[0] 1581 ); 1582 } 1583 1584 for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii) 1585 { 1586 const DxbcOperand& operand = _instruction.operand[ii]; 1587 1588 const bool array = false 1589 || 1 < operand.numAddrModes 1590 || DxbcOperandAddrMode::Imm32 != operand.addrMode[0] 1591 ; 1592 1593 const char* preOperand = ""; 1594 const char* postOperand = ""; 1595 1596 switch (operand.modifier) 1597 { 1598 case DxbcOperandModifier::Neg: preOperand = "-"; postOperand = ""; break; 1599 case DxbcOperandModifier::Abs: preOperand = "abs("; postOperand = ")"; break; 1600 case DxbcOperandModifier::AbsNeg: preOperand = "-abs("; postOperand = ")"; break; 1601 default: break; 1602 } 1603 1604 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1605 , "%s%s%s" 1606 , 0 == ii ? " " : ", " 1607 , preOperand 1608 , s_dxbcOperandType[operand.type] 1609 ); 1610 1611 switch (operand.type) 1612 { 1613 case DxbcOperandType::Imm32: 1614 case DxbcOperandType::Imm64: 1615 for (uint32_t jj = 0; jj < operand.num; ++jj) 1616 { 1617 union { uint32_t i; float f; } cast = { operand.un.imm32[jj] }; 1618 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1619 , "%s%f" 1620 , 0 == jj ? "(" : ", " 1621 , cast.f 1622 ); 1623 } 1624 1625 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1626 , ")" 1627 ); 1628 break; 1629 1630 default: 1631 break; 1632 } 1633 1634 const uint32_t first = false 1635 || DxbcOperandType::ImmConstantBuffer == operand.type 1636 || DxbcOperandAddrMode::RegImm32 == operand.addrMode[0] 1637 ? 0 : 1 1638 ; 1639 if (0 == first) 1640 { 1641 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1642 , "[" 1643 ); 1644 } 1645 else 1646 { 1647 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1648 , "%d%s" 1649 , operand.regIndex[0] 1650 , array ? "[" : "" 1651 ); 1652 } 1653 1654 for (uint32_t jj = first, num = bx::uint32_min(operand.numAddrModes, BX_COUNTOF(operand.addrMode) ); jj < num; ++jj) 1655 { 1656 switch (operand.addrMode[jj]) 1657 { 1658 case DxbcOperandAddrMode::Imm32: 1659 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1660 , "%d" 1661 , operand.regIndex[jj] 1662 ); 1663 break; 1664 1665 case DxbcOperandAddrMode::Reg: 1666 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1667 , "%s%d" 1668 , s_dxbcOperandType[operand.subOperand[jj].type] 1669 , operand.subOperand[jj].regIndex 1670 ); 1671 size += toString(&_out[size], bx::uint32_imax(0, _size-size) 1672 , DxbcOperandMode::Enum(operand.subOperand[jj].mode) 1673 , operand.subOperand[jj].modeBits 1674 ); 1675 break; 1676 1677 case DxbcOperandAddrMode::RegImm32: 1678 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1679 , "%d + %s%d" 1680 , operand.regIndex[jj] 1681 , s_dxbcOperandType[operand.subOperand[jj].type] 1682 , operand.subOperand[jj].regIndex 1683 ); 1684 size += toString(&_out[size], bx::uint32_imax(0, _size-size) 1685 , DxbcOperandMode::Enum(operand.subOperand[jj].mode) 1686 , operand.subOperand[jj].modeBits 1687 ); 1688 break; 1689 1690 default: 1691 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size), "???"); 1692 break; 1693 } 1694 } 1695 1696 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1697 , "%s" 1698 , array ? "]" : "" 1699 ); 1700 1701 size += toString(&_out[size], bx::uint32_imax(0, _size-size), operand.mode, operand.modeBits); 1702 1703 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1704 , "%s" 1705 , postOperand 1706 ); 1707 } 1708 1709 if (_instruction.opcode == DxbcOpcode::DCL_CONSTANT_BUFFER 1710 && _instruction.allowRefactoring) 1711 { 1712 size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) 1713 , ", dynamicIndexed" 1714 ); 1715 } 1716 1717 return size; 1718 } 1719 read(bx::ReaderSeekerI * _reader,DxbcSignature & _signature,bx::Error * _err)1720 int32_t read(bx::ReaderSeekerI* _reader, DxbcSignature& _signature, bx::Error* _err) 1721 { 1722 int32_t size = 0; 1723 1724 int64_t offset = bx::seek(_reader); 1725 1726 uint32_t num; 1727 size += bx::read(_reader, num, _err); 1728 size += bx::read(_reader, _signature.key, _err); 1729 1730 for (uint32_t ii = 0; ii < num; ++ii) 1731 { 1732 DxbcSignature::Element element; 1733 1734 uint32_t nameOffset; 1735 size += bx::read(_reader, nameOffset); 1736 1737 char name[DXBC_MAX_NAME_STRING]; 1738 readString(_reader, offset + nameOffset, name, DXBC_MAX_NAME_STRING, _err); 1739 element.name = name; 1740 1741 size += bx::read(_reader, element.semanticIndex, _err); 1742 size += bx::read(_reader, element.valueType, _err); 1743 size += bx::read(_reader, element.componentType, _err); 1744 size += bx::read(_reader, element.registerIndex, _err); 1745 size += bx::read(_reader, element.mask, _err); 1746 size += bx::read(_reader, element.readWriteMask, _err); 1747 size += bx::read(_reader, element.stream, _err); 1748 1749 // padding 1750 uint8_t padding; 1751 size += bx::read(_reader, padding, _err); 1752 1753 _signature.elements.push_back(element); 1754 } 1755 1756 return size; 1757 } 1758 write(bx::WriterI * _writer,const DxbcSignature & _signature,bx::Error * _err)1759 int32_t write(bx::WriterI* _writer, const DxbcSignature& _signature, bx::Error* _err) 1760 { 1761 int32_t size = 0; 1762 1763 const uint32_t num = uint32_t(_signature.elements.size() ); 1764 size += bx::write(_writer, num, _err); 1765 size += bx::write(_writer, _signature.key, _err); 1766 1767 typedef stl::unordered_map<stl::string, uint32_t> NameOffsetMap; 1768 NameOffsetMap nom; 1769 1770 const uint8_t pad = 0; 1771 uint32_t nameOffset = num * 24 + 8; 1772 for (uint32_t ii = 0; ii < num; ++ii) 1773 { 1774 const DxbcSignature::Element& element = _signature.elements[ii]; 1775 1776 NameOffsetMap::iterator it = nom.find(element.name); 1777 if (it == nom.end() ) 1778 { 1779 nom.insert(stl::make_pair(element.name, nameOffset) ); 1780 size += bx::write(_writer, nameOffset, _err); 1781 nameOffset += uint32_t(element.name.size() + 1); 1782 } 1783 else 1784 { 1785 size += bx::write(_writer, it->second); 1786 } 1787 1788 size += bx::write(_writer, element.semanticIndex, _err); 1789 size += bx::write(_writer, element.valueType, _err); 1790 size += bx::write(_writer, element.componentType, _err); 1791 size += bx::write(_writer, element.registerIndex, _err); 1792 size += bx::write(_writer, element.mask, _err); 1793 size += bx::write(_writer, element.readWriteMask, _err); 1794 size += bx::write(_writer, element.stream, _err); 1795 size += bx::write(_writer, pad, _err); 1796 } 1797 1798 uint32_t len = 0; 1799 for (uint32_t ii = 0; ii < num; ++ii) 1800 { 1801 const DxbcSignature::Element& element = _signature.elements[ii]; 1802 NameOffsetMap::iterator it = nom.find(element.name); 1803 if (it != nom.end() ) 1804 { 1805 nom.erase(it); 1806 size += bx::write(_writer, element.name.c_str(), uint32_t(element.name.size() + 1), _err); 1807 len += uint32_t(element.name.size() + 1); 1808 } 1809 } 1810 1811 // align 4 bytes 1812 size += bx::writeRep(_writer, 0xab, (len+3)/4*4 - len, _err); 1813 1814 return size; 1815 } 1816 read(bx::ReaderSeekerI * _reader,DxbcShader & _shader,bx::Error * _err)1817 int32_t read(bx::ReaderSeekerI* _reader, DxbcShader& _shader, bx::Error* _err) 1818 { 1819 int32_t size = 0; 1820 1821 size += bx::read(_reader, _shader.version, _err); 1822 1823 uint32_t bcLength; 1824 size += bx::read(_reader, bcLength, _err); 1825 1826 uint32_t len = (bcLength-2)*sizeof(uint32_t); 1827 _shader.byteCode.resize(len); 1828 size += bx::read(_reader, _shader.byteCode.data(), len, _err); 1829 1830 return size; 1831 } 1832 write(bx::WriterI * _writer,const DxbcShader & _shader,bx::Error * _err)1833 int32_t write(bx::WriterI* _writer, const DxbcShader& _shader, bx::Error* _err) 1834 { 1835 const uint32_t len = uint32_t(_shader.byteCode.size() ); 1836 const uint32_t bcLength = len / sizeof(uint32_t) + 2; 1837 1838 int32_t size = 0; 1839 size += bx::write(_writer, _shader.version, _err); 1840 size += bx::write(_writer, bcLength, _err); 1841 size += bx::write(_writer, _shader.byteCode.data(), len, _err); 1842 1843 return size; 1844 } 1845 1846 #define DXBC_CHUNK_SHADER BX_MAKEFOURCC('S', 'H', 'D', 'R') 1847 #define DXBC_CHUNK_SHADER_EX BX_MAKEFOURCC('S', 'H', 'E', 'X') 1848 1849 #define DXBC_CHUNK_INPUT_SIGNATURE BX_MAKEFOURCC('I', 'S', 'G', 'N') 1850 #define DXBC_CHUNK_OUTPUT_SIGNATURE BX_MAKEFOURCC('O', 'S', 'G', 'N') 1851 read(bx::ReaderSeekerI * _reader,DxbcContext & _dxbc,bx::Error * _err)1852 int32_t read(bx::ReaderSeekerI* _reader, DxbcContext& _dxbc, bx::Error* _err) 1853 { 1854 int32_t size = 0; 1855 size += bx::read(_reader, _dxbc.header, _err); 1856 _dxbc.shader.shex = false; 1857 _dxbc.shader.aon9 = false; 1858 1859 for (uint32_t ii = 0; ii < _dxbc.header.numChunks; ++ii) 1860 { 1861 bx::seek(_reader, sizeof(DxbcContext::Header) + ii*sizeof(uint32_t), bx::Whence::Begin); 1862 1863 uint32_t chunkOffset; 1864 size += bx::read(_reader, chunkOffset, _err); 1865 1866 bx::seek(_reader, chunkOffset, bx::Whence::Begin); 1867 1868 uint32_t fourcc; 1869 size += bx::read(_reader, fourcc, _err); 1870 1871 uint32_t chunkSize; 1872 size += bx::read(_reader, chunkSize, _err); 1873 1874 switch (fourcc) 1875 { 1876 case DXBC_CHUNK_SHADER_EX: 1877 _dxbc.shader.shex = true; 1878 BX_FALLTHROUGH; 1879 1880 case DXBC_CHUNK_SHADER: 1881 size += read(_reader, _dxbc.shader, _err); 1882 break; 1883 1884 case BX_MAKEFOURCC('I', 'S', 'G', '1'): 1885 case DXBC_CHUNK_INPUT_SIGNATURE: 1886 size += read(_reader, _dxbc.inputSignature, _err); 1887 break; 1888 1889 case BX_MAKEFOURCC('O', 'S', 'G', '1'): 1890 case BX_MAKEFOURCC('O', 'S', 'G', '5'): 1891 case DXBC_CHUNK_OUTPUT_SIGNATURE: 1892 size += read(_reader, _dxbc.outputSignature, _err); 1893 break; 1894 1895 case BX_MAKEFOURCC('A', 'o', 'n', '9'): // Contains DX9BC for feature level 9.x (*s_4_0_level_9_*) shaders. 1896 _dxbc.shader.aon9 = true; 1897 break; 1898 1899 case BX_MAKEFOURCC('I', 'F', 'C', 'E'): // Interface. 1900 case BX_MAKEFOURCC('R', 'D', 'E', 'F'): // Resource definition. 1901 case BX_MAKEFOURCC('S', 'D', 'G', 'B'): // Shader debugging info (old). 1902 case BX_MAKEFOURCC('S', 'P', 'D', 'B'): // Shader debugging info (new). 1903 case BX_MAKEFOURCC('S', 'F', 'I', '0'): // ? 1904 case BX_MAKEFOURCC('S', 'T', 'A', 'T'): // Statistics. 1905 case BX_MAKEFOURCC('P', 'C', 'S', 'G'): // Patch constant signature. 1906 case BX_MAKEFOURCC('P', 'S', 'O', '1'): // Pipeline State Object 1 1907 case BX_MAKEFOURCC('P', 'S', 'O', '2'): // Pipeline State Object 2 1908 case BX_MAKEFOURCC('X', 'N', 'A', 'P'): // ? 1909 case BX_MAKEFOURCC('X', 'N', 'A', 'S'): // ? 1910 size += chunkSize; 1911 break; 1912 1913 default: 1914 size += chunkSize; 1915 BX_CHECK(false, "UNKNOWN FOURCC %c%c%c%c %d" 1916 , ( (char*)&fourcc)[0] 1917 , ( (char*)&fourcc)[1] 1918 , ( (char*)&fourcc)[2] 1919 , ( (char*)&fourcc)[3] 1920 , size 1921 ); 1922 break; 1923 } 1924 } 1925 1926 return size; 1927 } 1928 write(bx::WriterSeekerI * _writer,const DxbcContext & _dxbc,bx::Error * _err)1929 int32_t write(bx::WriterSeekerI* _writer, const DxbcContext& _dxbc, bx::Error* _err) 1930 { 1931 int32_t size = 0; 1932 1933 int64_t dxbcOffset = bx::seek(_writer); 1934 size += bx::write(_writer, DXBC_CHUNK_HEADER); 1935 1936 size += bx::writeRep(_writer, 0, 16, _err); 1937 1938 size += bx::write(_writer, UINT32_C(1), _err); 1939 1940 int64_t sizeOffset = bx::seek(_writer); 1941 size += bx::writeRep(_writer, 0, 4, _err); 1942 1943 uint32_t numChunks = 3; 1944 size += bx::write(_writer, numChunks, _err); 1945 1946 int64_t chunksOffsets = bx::seek(_writer); 1947 size += bx::writeRep(_writer, 0, numChunks*sizeof(uint32_t), _err); 1948 1949 uint32_t chunkOffset[3]; 1950 uint32_t chunkSize[3]; 1951 1952 chunkOffset[0] = uint32_t(bx::seek(_writer) - dxbcOffset); 1953 size += write(_writer, DXBC_CHUNK_INPUT_SIGNATURE, _err); 1954 size += write(_writer, UINT32_C(0), _err); 1955 chunkSize[0] = write(_writer, _dxbc.inputSignature, _err); 1956 1957 chunkOffset[1] = uint32_t(bx::seek(_writer) - dxbcOffset); 1958 size += write(_writer, DXBC_CHUNK_OUTPUT_SIGNATURE, _err); 1959 size += write(_writer, UINT32_C(0), _err); 1960 chunkSize[1] = write(_writer, _dxbc.outputSignature, _err); 1961 1962 chunkOffset[2] = uint32_t(bx::seek(_writer) - dxbcOffset); 1963 size += write(_writer, _dxbc.shader.shex ? DXBC_CHUNK_SHADER_EX : DXBC_CHUNK_SHADER, _err); 1964 size += write(_writer, UINT32_C(0), _err); 1965 chunkSize[2] = write(_writer, _dxbc.shader, _err); 1966 1967 size += 0 1968 + chunkSize[0] 1969 + chunkSize[1] 1970 + chunkSize[2] 1971 ; 1972 1973 int64_t eof = bx::seek(_writer); 1974 1975 bx::seek(_writer, sizeOffset, bx::Whence::Begin); 1976 bx::write(_writer, size, _err); 1977 1978 bx::seek(_writer, chunksOffsets, bx::Whence::Begin); 1979 bx::write(_writer, chunkOffset, sizeof(chunkOffset), _err); 1980 1981 for (uint32_t ii = 0; ii < BX_COUNTOF(chunkOffset); ++ii) 1982 { 1983 bx::seek(_writer, chunkOffset[ii]+4, bx::Whence::Begin); 1984 bx::write(_writer, chunkSize[ii], _err); 1985 } 1986 1987 bx::seek(_writer, eof, bx::Whence::Begin); 1988 1989 return size; 1990 } 1991 parse(const DxbcShader & _src,DxbcParseFn _fn,void * _userData,bx::Error * _err)1992 void parse(const DxbcShader& _src, DxbcParseFn _fn, void* _userData, bx::Error* _err) 1993 { 1994 BX_ERROR_SCOPE(_err); 1995 1996 bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) ); 1997 1998 for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;) 1999 { 2000 DxbcInstruction instruction; 2001 uint32_t size = read(&reader, instruction, _err); 2002 BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length); BX_UNUSED(size); 2003 2004 bool cont = _fn(token * sizeof(uint32_t), instruction, _userData); 2005 if (!cont) 2006 { 2007 return; 2008 } 2009 2010 token += instruction.length; 2011 } 2012 } 2013 filter(DxbcShader & _dst,const DxbcShader & _src,DxbcFilterFn _fn,void * _userData,bx::Error * _err)2014 void filter(DxbcShader& _dst, const DxbcShader& _src, DxbcFilterFn _fn, void* _userData, bx::Error* _err) 2015 { 2016 BX_ERROR_SCOPE(_err); 2017 2018 bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) ); 2019 2020 bx::MemoryBlock mb(g_allocator); 2021 bx::MemoryWriter writer(&mb); 2022 2023 int32_t total = 0; 2024 2025 for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;) 2026 { 2027 DxbcInstruction instruction; 2028 uint32_t size = read(&reader, instruction, _err); 2029 BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length); BX_UNUSED(size); 2030 2031 _fn(instruction, _userData); 2032 2033 bx::SizerWriter sw; 2034 uint32_t length = instruction.length; 2035 instruction.length = uint32_t(write(&sw, instruction, _err)/4); 2036 2037 total += write(&writer, instruction, _err); 2038 token += length; 2039 } 2040 2041 uint8_t* data = (uint8_t*)mb.more(); 2042 _dst.byteCode.resize(total); 2043 bx::memCopy(_dst.byteCode.data(), data, total); 2044 } 2045 2046 } // namespace bgfx 2047