1 /*
2  * Copyright 2011-2019 Branimir Karadzic. All rights reserved.
3  * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
4  */
5 
6 #include "bgfx_p.h"
7 #include "shader_dxbc.h"
8 
9 namespace bgfx
10 {
11 	struct DxbcOpcodeInfo
12 	{
13 		uint8_t numOperands;
14 		uint8_t numValues;
15 	};
16 
17 	static const DxbcOpcodeInfo s_dxbcOpcodeInfo[] =
18 	{
19 		{ 3, 0 }, // ADD
20 		{ 3, 0 }, // AND
21 		{ 0, 0 }, // BREAK
22 		{ 1, 0 }, // BREAKC
23 		{ 0, 0 }, // CALL
24 		{ 0, 0 }, // CALLC
25 		{ 1, 0 }, // CASE
26 		{ 0, 0 }, // CONTINUE
27 		{ 1, 0 }, // CONTINUEC
28 		{ 0, 0 }, // CUT
29 		{ 0, 0 }, // DEFAULT
30 		{ 2, 0 }, // DERIV_RTX
31 		{ 2, 0 }, // DERIV_RTY
32 		{ 1, 0 }, // DISCARD
33 		{ 3, 0 }, // DIV
34 		{ 3, 0 }, // DP2
35 		{ 3, 0 }, // DP3
36 		{ 3, 0 }, // DP4
37 		{ 0, 0 }, // ELSE
38 		{ 0, 0 }, // EMIT
39 		{ 0, 0 }, // EMITTHENCUT
40 		{ 0, 0 }, // ENDIF
41 		{ 0, 0 }, // ENDLOOP
42 		{ 0, 0 }, // ENDSWITCH
43 		{ 3, 0 }, // EQ
44 		{ 2, 0 }, // EXP
45 		{ 2, 0 }, // FRC
46 		{ 2, 0 }, // FTOI
47 		{ 2, 0 }, // FTOU
48 		{ 3, 0 }, // GE
49 		{ 3, 0 }, // IADD
50 		{ 1, 0 }, // IF
51 		{ 3, 0 }, // IEQ
52 		{ 3, 0 }, // IGE
53 		{ 3, 0 }, // ILT
54 		{ 4, 0 }, // IMAD
55 		{ 3, 0 }, // IMAX
56 		{ 3, 0 }, // IMIN
57 		{ 4, 0 }, // IMUL
58 		{ 3, 0 }, // INE
59 		{ 2, 0 }, // INEG
60 		{ 3, 0 }, // ISHL
61 		{ 3, 0 }, // ISHR
62 		{ 2, 0 }, // ITOF
63 		{ 0, 0 }, // LABEL
64 		{ 3, 0 }, // LD
65 		{ 4, 0 }, // LD_MS
66 		{ 2, 0 }, // LOG
67 		{ 0, 0 }, // LOOP
68 		{ 3, 0 }, // LT
69 		{ 4, 0 }, // MAD
70 		{ 3, 0 }, // MIN
71 		{ 3, 0 }, // MAX
72 		{ 0, 1 }, // CUSTOMDATA
73 		{ 2, 0 }, // MOV
74 		{ 4, 0 }, // MOVC
75 		{ 3, 0 }, // MUL
76 		{ 3, 0 }, // NE
77 		{ 0, 0 }, // NOP
78 		{ 2, 0 }, // NOT
79 		{ 3, 0 }, // OR
80 		{ 3, 0 }, // RESINFO
81 		{ 0, 0 }, // RET
82 		{ 1, 0 }, // RETC
83 		{ 2, 0 }, // ROUND_NE
84 		{ 2, 0 }, // ROUND_NI
85 		{ 2, 0 }, // ROUND_PI
86 		{ 2, 0 }, // ROUND_Z
87 		{ 2, 0 }, // RSQ
88 		{ 4, 0 }, // SAMPLE
89 		{ 5, 0 }, // SAMPLE_C
90 		{ 5, 0 }, // SAMPLE_C_LZ
91 		{ 5, 0 }, // SAMPLE_L
92 		{ 6, 0 }, // SAMPLE_D
93 		{ 5, 0 }, // SAMPLE_B
94 		{ 2, 0 }, // SQRT
95 		{ 1, 0 }, // SWITCH
96 		{ 3, 0 }, // SINCOS
97 		{ 4, 0 }, // UDIV
98 		{ 3, 0 }, // ULT
99 		{ 3, 0 }, // UGE
100 		{ 4, 0 }, // UMUL
101 		{ 4, 0 }, // UMAD
102 		{ 3, 0 }, // UMAX
103 		{ 3, 0 }, // UMIN
104 		{ 3, 0 }, // USHR
105 		{ 2, 0 }, // UTOF
106 		{ 3, 0 }, // XOR
107 		{ 1, 1 }, // DCL_RESOURCE
108 		{ 1, 0 }, // DCL_CONSTANT_BUFFER
109 		{ 1, 0 }, // DCL_SAMPLER
110 		{ 1, 1 }, // DCL_INDEX_RANGE
111 		{ 1, 0 }, // DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY
112 		{ 1, 0 }, // DCL_GS_INPUT_PRIMITIVE
113 		{ 0, 1 }, // DCL_MAX_OUTPUT_VERTEX_COUNT
114 		{ 1, 0 }, // DCL_INPUT
115 		{ 1, 1 }, // DCL_INPUT_SGV
116 		{ 1, 0 }, // DCL_INPUT_SIV
117 		{ 1, 0 }, // DCL_INPUT_PS
118 		{ 1, 1 }, // DCL_INPUT_PS_SGV
119 		{ 1, 1 }, // DCL_INPUT_PS_SIV
120 		{ 1, 0 }, // DCL_OUTPUT
121 		{ 1, 0 }, // DCL_OUTPUT_SGV
122 		{ 1, 1 }, // DCL_OUTPUT_SIV
123 		{ 0, 1 }, // DCL_TEMPS
124 		{ 0, 3 }, // DCL_INDEXABLE_TEMP
125 		{ 0, 0 }, // DCL_GLOBAL_FLAGS
126 
127 		{ 0, 0 }, // InstrD3D10
128 		{ 4, 0 }, // LOD
129 		{ 4, 0 }, // GATHER4
130 		{ 0, 0 }, // SAMPLE_POS
131 		{ 0, 0 }, // SAMPLE_INFO
132 
133 		{ 0, 0 }, // InstrD3D10_1
134 		{ 0, 0 }, // HS_DECLS
135 		{ 0, 0 }, // HS_CONTROL_POINT_PHASE
136 		{ 0, 0 }, // HS_FORK_PHASE
137 		{ 0, 0 }, // HS_JOIN_PHASE
138 		{ 0, 0 }, // EMIT_STREAM
139 		{ 0, 0 }, // CUT_STREAM
140 		{ 1, 0 }, // EMITTHENCUT_STREAM
141 		{ 1, 0 }, // INTERFACE_CALL
142 		{ 0, 0 }, // BUFINFO
143 		{ 2, 0 }, // DERIV_RTX_COARSE
144 		{ 2, 0 }, // DERIV_RTX_FINE
145 		{ 2, 0 }, // DERIV_RTY_COARSE
146 		{ 2, 0 }, // DERIV_RTY_FINE
147 		{ 5, 0 }, // GATHER4_C
148 		{ 5, 0 }, // GATHER4_PO
149 		{ 0, 0 }, // GATHER4_PO_C
150 		{ 2, 0 }, // RCP
151 		{ 0, 0 }, // F32TOF16
152 		{ 0, 0 }, // F16TOF32
153 		{ 0, 0 }, // UADDC
154 		{ 0, 0 }, // USUBB
155 		{ 0, 0 }, // COUNTBITS
156 		{ 0, 0 }, // FIRSTBIT_HI
157 		{ 0, 0 }, // FIRSTBIT_LO
158 		{ 0, 0 }, // FIRSTBIT_SHI
159 		{ 4, 0 }, // UBFE
160 		{ 4, 0 }, // IBFE
161 		{ 5, 0 }, // BFI
162 		{ 0, 0 }, // BFREV
163 		{ 5, 0 }, // SWAPC
164 		{ 0, 0 }, // DCL_STREAM
165 		{ 1, 0 }, // DCL_FUNCTION_BODY
166 		{ 0, 0 }, // DCL_FUNCTION_TABLE
167 		{ 0, 0 }, // DCL_INTERFACE
168 		{ 0, 0 }, // DCL_INPUT_CONTROL_POINT_COUNT
169 		{ 0, 0 }, // DCL_OUTPUT_CONTROL_POINT_COUNT
170 		{ 0, 0 }, // DCL_TESS_DOMAIN
171 		{ 0, 0 }, // DCL_TESS_PARTITIONING
172 		{ 0, 0 }, // DCL_TESS_OUTPUT_PRIMITIVE
173 		{ 0, 0 }, // DCL_HS_MAX_TESSFACTOR
174 		{ 0, 0 }, // DCL_HS_FORK_PHASE_INSTANCE_COUNT
175 		{ 0, 0 }, // DCL_HS_JOIN_PHASE_INSTANCE_COUNT
176 		{ 0, 3 }, // DCL_THREAD_GROUP
177 		{ 1, 1 }, // DCL_UNORDERED_ACCESS_VIEW_TYPED
178 		{ 1, 0 }, // DCL_UNORDERED_ACCESS_VIEW_RAW
179 		{ 1, 1 }, // DCL_UNORDERED_ACCESS_VIEW_STRUCTURED
180 		{ 1, 1 }, // DCL_THREAD_GROUP_SHARED_MEMORY_RAW
181 		{ 1, 2 }, // DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED
182 		{ 1, 0 }, // DCL_RESOURCE_RAW
183 		{ 1, 1 }, // DCL_RESOURCE_STRUCTURED
184 		{ 3, 0 }, // LD_UAV_TYPED
185 		{ 3, 0 }, // STORE_UAV_TYPED
186 		{ 3, 0 }, // LD_RAW
187 		{ 3, 0 }, // STORE_RAW
188 		{ 4, 0 }, // LD_STRUCTURED
189 		{ 4, 0 }, // STORE_STRUCTURED
190 		{ 3, 0 }, // ATOMIC_AND
191 		{ 3, 0 }, // ATOMIC_OR
192 		{ 3, 0 }, // ATOMIC_XOR
193 		{ 3, 0 }, // ATOMIC_CMP_STORE
194 		{ 3, 0 }, // ATOMIC_IADD
195 		{ 3, 0 }, // ATOMIC_IMAX
196 		{ 3, 0 }, // ATOMIC_IMIN
197 		{ 3, 0 }, // ATOMIC_UMAX
198 		{ 3, 0 }, // ATOMIC_UMIN
199 		{ 2, 0 }, // IMM_ATOMIC_ALLOC
200 		{ 2, 0 }, // IMM_ATOMIC_CONSUME
201 		{ 0, 0 }, // IMM_ATOMIC_IADD
202 		{ 0, 0 }, // IMM_ATOMIC_AND
203 		{ 0, 0 }, // IMM_ATOMIC_OR
204 		{ 0, 0 }, // IMM_ATOMIC_XOR
205 		{ 0, 0 }, // IMM_ATOMIC_EXCH
206 		{ 0, 0 }, // IMM_ATOMIC_CMP_EXCH
207 		{ 0, 0 }, // IMM_ATOMIC_IMAX
208 		{ 0, 0 }, // IMM_ATOMIC_IMIN
209 		{ 0, 0 }, // IMM_ATOMIC_UMAX
210 		{ 0, 0 }, // IMM_ATOMIC_UMIN
211 		{ 0, 0 }, // SYNC
212 		{ 3, 0 }, // DADD
213 		{ 3, 0 }, // DMAX
214 		{ 3, 0 }, // DMIN
215 		{ 3, 0 }, // DMUL
216 		{ 3, 0 }, // DEQ
217 		{ 3, 0 }, // DGE
218 		{ 3, 0 }, // DLT
219 		{ 3, 0 }, // DNE
220 		{ 2, 0 }, // DMOV
221 		{ 4, 0 }, // DMOVC
222 		{ 0, 0 }, // DTOF
223 		{ 0, 0 }, // FTOD
224 		{ 3, 0 }, // EVAL_SNAPPED
225 		{ 3, 0 }, // EVAL_SAMPLE_INDEX
226 		{ 2, 0 }, // EVAL_CENTROID
227 		{ 0, 1 }, // DCL_GS_INSTANCE_COUNT
228 		{ 0, 0 }, // ABORT
229 		{ 0, 0 }, // DEBUG_BREAK
230 
231 		{ 0, 0 }, // InstrD3D11
232 		{ 0, 0 }, // DDIV
233 		{ 0, 0 }, // DFMA
234 		{ 0, 0 }, // DRCP
235 		{ 0, 0 }, // MSAD
236 		{ 0, 0 }, // DTOI
237 		{ 0, 0 }, // DTOU
238 		{ 0, 0 }, // ITOD
239 		{ 0, 0 }, // UTOD
240 	};
241 	BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOpcodeInfo) == DxbcOpcode::Count);
242 
243 	static const char* s_dxbcOpcode[] =
244 	{
245 		"add",
246 		"and",
247 		"break",
248 		"breakc",
249 		"call",
250 		"callc",
251 		"case",
252 		"continue",
253 		"continuec",
254 		"cut",
255 		"default",
256 		"deriv_rtx",
257 		"deriv_rty",
258 		"discard",
259 		"div",
260 		"dp2",
261 		"dp3",
262 		"dp4",
263 		"else",
264 		"emit",
265 		"emitthencut",
266 		"endif",
267 		"endloop",
268 		"endswitch",
269 		"eq",
270 		"exp",
271 		"frc",
272 		"ftoi",
273 		"ftou",
274 		"ge",
275 		"iadd",
276 		"if",
277 		"ieq",
278 		"ige",
279 		"ilt",
280 		"imad",
281 		"imax",
282 		"imin",
283 		"imul",
284 		"ine",
285 		"ineg",
286 		"ishl",
287 		"ishr",
288 		"itof",
289 		"label",
290 		"ld",
291 		"ld_ms",
292 		"log",
293 		"loop",
294 		"lt",
295 		"mad",
296 		"min",
297 		"max",
298 		"customdata",
299 		"mov",
300 		"movc",
301 		"mul",
302 		"ne",
303 		"nop",
304 		"not",
305 		"or",
306 		"resinfo",
307 		"ret",
308 		"retc",
309 		"round_ne",
310 		"round_ni",
311 		"round_pi",
312 		"round_z",
313 		"rsq",
314 		"sample",
315 		"sample_c",
316 		"sample_c_lz",
317 		"sample_l",
318 		"sample_d",
319 		"sample_b",
320 		"sqrt",
321 		"switch",
322 		"sincos",
323 		"udiv",
324 		"ult",
325 		"uge",
326 		"umul",
327 		"umad",
328 		"umax",
329 		"umin",
330 		"ushr",
331 		"utof",
332 		"xor",
333 		"dcl_resource",
334 		"dcl_constantbuffer",
335 		"dcl_sampler",
336 		"dcl_index_range",
337 		"dcl_gs_output_primitive_topology",
338 		"dcl_gs_input_primitive",
339 		"dcl_max_output_vertex_count",
340 		"dcl_input",
341 		"dcl_input_sgv",
342 		"dcl_input_siv",
343 		"dcl_input_ps",
344 		"dcl_input_ps_sgv",
345 		"dcl_input_ps_siv",
346 		"dcl_output",
347 		"dcl_output_sgv",
348 		"dcl_output_siv",
349 		"dcl_temps",
350 		"dcl_indexable_temp",
351 		"dcl_global_flags",
352 
353 		NULL,
354 		"lod",
355 		"gather4",
356 		"sample_pos",
357 		"sample_info",
358 
359 		NULL,
360 		"hs_decls",
361 		"hs_control_point_phase",
362 		"hs_fork_phase",
363 		"hs_join_phase",
364 		"emit_stream",
365 		"cut_stream",
366 		"emitthencut_stream",
367 		"interface_call",
368 		"bufinfo",
369 		"deriv_rtx_coarse",
370 		"deriv_rtx_fine",
371 		"deriv_rty_coarse",
372 		"deriv_rty_fine",
373 		"gather4_c",
374 		"gather4_po",
375 		"gather4_po_c",
376 		"rcp",
377 		"f32tof16",
378 		"f16tof32",
379 		"uaddc",
380 		"usubb",
381 		"countbits",
382 		"firstbit_hi",
383 		"firstbit_lo",
384 		"firstbit_shi",
385 		"ubfe",
386 		"ibfe",
387 		"bfi",
388 		"bfrev",
389 		"swapc",
390 		"dcl_stream",
391 		"dcl_function_body",
392 		"dcl_function_table",
393 		"dcl_interface",
394 		"dcl_input_control_point_count",
395 		"dcl_output_control_point_count",
396 		"dcl_tess_domain",
397 		"dcl_tess_partitioning",
398 		"dcl_tess_output_primitive",
399 		"dcl_hs_max_tessfactor",
400 		"dcl_hs_fork_phase_instance_count",
401 		"dcl_hs_join_phase_instance_count",
402 		"dcl_thread_group",
403 		"dcl_unordered_access_view_typed",
404 		"dcl_unordered_access_view_raw",
405 		"dcl_unordered_access_view_structured",
406 		"dcl_thread_group_shared_memory_raw",
407 		"dcl_thread_group_shared_memory_structured",
408 		"dcl_resource_raw",
409 		"dcl_resource_structured",
410 		"ld_uav_typed",
411 		"store_uav_typed",
412 		"ld_raw",
413 		"store_raw",
414 		"ld_structured",
415 		"store_structured",
416 		"atomic_and",
417 		"atomic_or",
418 		"atomic_xor",
419 		"atomic_cmp_store",
420 		"atomic_iadd",
421 		"atomic_imax",
422 		"atomic_imin",
423 		"atomic_umax",
424 		"atomic_umin",
425 		"imm_atomic_alloc",
426 		"imm_atomic_consume",
427 		"imm_atomic_iadd",
428 		"imm_atomic_and",
429 		"imm_atomic_or",
430 		"imm_atomic_xor",
431 		"imm_atomic_exch",
432 		"imm_atomic_cmp_exch",
433 		"imm_atomic_imax",
434 		"imm_atomic_imin",
435 		"imm_atomic_umax",
436 		"imm_atomic_umin",
437 		"sync",
438 		"dadd",
439 		"dmax",
440 		"dmin",
441 		"dmul",
442 		"deq",
443 		"dge",
444 		"dlt",
445 		"dne",
446 		"dmov",
447 		"dmovc",
448 		"dtof",
449 		"ftod",
450 		"eval_snapped",
451 		"eval_sample_index",
452 		"eval_centroid",
453 		"dcl_gs_instance_count",
454 		"abort",
455 		"debug_break",
456 
457 		NULL,
458 		"ddiv",
459 		"dfma",
460 		"drcp",
461 		"msad",
462 		"dtoi",
463 		"dtou",
464 		"itod",
465 		"utod",
466 	};
467 	BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOpcode) == DxbcOpcode::Count);
468 
getName(DxbcOpcode::Enum _opcode)469 	const char* getName(DxbcOpcode::Enum _opcode)
470 	{
471 		BX_CHECK(_opcode < DxbcOpcode::Count, "Unknown opcode id %d.", _opcode);
472 		return s_dxbcOpcode[_opcode];
473 	}
474 
475 	static const char* s_dxbcSrvType[] =
476 	{
477 		"",                 // Unknown
478 		"Buffer",           // Buffer
479 		"Texture1D",        // Texture1D
480 		"Texture2D",        // Texture2D
481 		"Texture2DMS",      // Texture2DMS
482 		"Texture3D",        // Texture3D
483 		"TextureCube",      // TextureCube
484 		"Texture1DArray",   // Texture1DArray
485 		"Texture2DArray",   // Texture2DArray
486 		"Texture2DMSArray", // Texture2DMSArray
487 		"TextureCubearray", // TextureCubearray
488 		"RawBuffer",        // RawBuffer
489 		"StructuredBuffer", // StructuredBuffer
490 	};
491 	BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcSrvType) == DxbcResourceDim::Count);
492 
493 	const char* s_dxbcInterpolationName[] =
494 	{
495 		"",
496 		"constant",
497 		"linear",
498 		"linear centroid",
499 		"linear noperspective",
500 		"linear noperspective centroid",
501 		"linear sample",
502 		"linear noperspective sample",
503 	};
504 	BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcInterpolationName) == DxbcInterpolation::Count);
505 
506 	// mesa/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/shortfiles.txt
507 	static const char* s_dxbcOperandType[] =
508 	{
509 		"r",                         // Temp
510 		"v",                         // Input
511 		"o",                         // Output
512 		"x",                         // TempArray
513 		"l",                         // Imm32
514 		"d",                         // Imm64
515 		"s",                         // Sampler
516 		"t",                         // Resource
517 		"cb",                        // ConstantBuffer
518 		"icb",                       // ImmConstantBuffer
519 		"label",                     // Label
520 		"vPrim",                     // PrimitiveID
521 		"oDepth",                    // OutputDepth
522 		"null",                      // Null
523 		"rasterizer",                // Rasterizer
524 		"oMask",                     // CoverageMask
525 		"stream",                    // Stream
526 		"function_body",             // FunctionBody
527 		"function_table",            // FunctionTable
528 		"interface",                 // Interface
529 		"function_input",            // FunctionInput
530 		"function_output",           // FunctionOutput
531 		"vOutputControlPointID",     // OutputControlPointId
532 		"vForkInstanceID",           // InputForkInstanceId
533 		"vJoinInstanceID",           // InputJoinInstanceId
534 		"vicp",                      // InputControlPoint
535 		"vocp",                      // OutputControlPoint
536 		"vpc",                       // InputPatchConstant
537 		"vDomain",                   // InputDomainPoint
538 		"this",                      // ThisPointer
539 		"u",                         // UnorderedAccessView
540 		"g",                         // ThreadGroupSharedMemory
541 		"vThreadID",                 // InputThreadId
542 		"vThreadGrouID",             // InputThreadGroupId
543 		"vThreadIDInGroup",          // InputThreadIdInGroup
544 		"vCoverage",                 // InputCoverageMask
545 		"vThreadIDInGroupFlattened", // InputThreadIdInGroupFlattened
546 		"vGSInstanceID",             // InputGsInstanceId
547 		"oDepthGE",                  // OutputDepthGreaterEqual
548 		"oDepthLE",                  // OutputDepthLessEqual
549 		"vCycleCounter",             // CycleCounter
550 	};
551 	BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOperandType) == DxbcOperandType::Count);
552 
553 	static const char* s_dxbcCustomDataClass[] =
554 	{
555 		"Comment",
556 		"DebugInfo",
557 		"Opaque",
558 		"dcl_immediateConstantBuffer",
559 		"ShaderMessage",
560 		"ClipPlaneConstantMappingsForDx9",
561 	};
562 	BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcCustomDataClass) == DxbcCustomDataClass::Count);
563 
564 #define DXBC_MAX_NAME_STRING 512
565 
readString(bx::ReaderSeekerI * _reader,int64_t _offset,char * _out,uint32_t _max,bx::Error * _err)566 	int32_t readString(bx::ReaderSeekerI* _reader, int64_t _offset, char* _out, uint32_t _max, bx::Error* _err)
567 	{
568 		int64_t oldOffset = bx::seek(_reader);
569 		bx::seek(_reader, _offset, bx::Whence::Begin);
570 
571 		int32_t size = 0;
572 
573 		for (uint32_t ii = 0; ii < _max-1; ++ii)
574 		{
575 			char ch;
576 			size += bx::read(_reader, ch, _err);
577 			*_out++ = ch;
578 
579 			if ('\0' == ch)
580 			{
581 				break;
582 			}
583 		}
584 		*_out = '\0';
585 
586 		bx::seek(_reader, oldOffset, bx::Whence::Begin);
587 
588 		return size;
589 	}
590 
dxbcMixF(uint32_t _b,uint32_t _c,uint32_t _d)591 	inline uint32_t dxbcMixF(uint32_t _b, uint32_t _c, uint32_t _d)
592 	{
593 		const uint32_t tmp0   = bx::uint32_xor(_c, _d);
594 		const uint32_t tmp1   = bx::uint32_and(_b, tmp0);
595 		const uint32_t result = bx::uint32_xor(_d, tmp1);
596 
597 		return result;
598 	}
599 
dxbcMixG(uint32_t _b,uint32_t _c,uint32_t _d)600 	inline uint32_t dxbcMixG(uint32_t _b, uint32_t _c, uint32_t _d)
601 	{
602 		return dxbcMixF(_d, _b, _c);
603 	}
604 
dxbcMixH(uint32_t _b,uint32_t _c,uint32_t _d)605 	inline uint32_t dxbcMixH(uint32_t _b, uint32_t _c, uint32_t _d)
606 	{
607 		const uint32_t tmp0   = bx::uint32_xor(_b, _c);
608 		const uint32_t result = bx::uint32_xor(_d, tmp0);
609 
610 		return result;
611 	}
612 
dxbcMixI(uint32_t _b,uint32_t _c,uint32_t _d)613 	inline uint32_t dxbcMixI(uint32_t _b, uint32_t _c, uint32_t _d)
614 	{
615 		const uint32_t tmp0   = bx::uint32_orc(_b, _d);
616 		const uint32_t result = bx::uint32_xor(_c, tmp0);
617 
618 		return result;
619 	}
620 
dxbcHashBlock(const uint32_t * data,uint32_t * hash)621 	void dxbcHashBlock(const uint32_t* data, uint32_t* hash)
622 	{
623 		const uint32_t d0  = data[ 0];
624 		const uint32_t d1  = data[ 1];
625 		const uint32_t d2  = data[ 2];
626 		const uint32_t d3  = data[ 3];
627 		const uint32_t d4  = data[ 4];
628 		const uint32_t d5  = data[ 5];
629 		const uint32_t d6  = data[ 6];
630 		const uint32_t d7  = data[ 7];
631 		const uint32_t d8  = data[ 8];
632 		const uint32_t d9  = data[ 9];
633 		const uint32_t d10 = data[10];
634 		const uint32_t d11 = data[11];
635 		const uint32_t d12 = data[12];
636 		const uint32_t d13 = data[13];
637 		const uint32_t d14 = data[14];
638 		const uint32_t d15 = data[15];
639 
640 		uint32_t aa = hash[0];
641 		uint32_t bb = hash[1];
642 		uint32_t cc = hash[2];
643 		uint32_t dd = hash[3];
644 
645 		aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d0  + 0xd76aa478,  7);
646 		dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d1  + 0xe8c7b756, 12);
647 		cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d2  + 0x242070db, 15);
648 		bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d3  + 0xc1bdceee, 10);
649 		aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d4  + 0xf57c0faf,  7);
650 		dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d5  + 0x4787c62a, 12);
651 		cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d6  + 0xa8304613, 15);
652 		bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d7  + 0xfd469501, 10);
653 		aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d8  + 0x698098d8,  7);
654 		dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d9  + 0x8b44f7af, 12);
655 		cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d10 + 0xffff5bb1, 15);
656 		bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d11 + 0x895cd7be, 10);
657 		aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d12 + 0x6b901122,  7);
658 		dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d13 + 0xfd987193, 12);
659 		cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d14 + 0xa679438e, 15);
660 		bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d15 + 0x49b40821, 10);
661 
662 		aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d1  + 0xf61e2562,  5);
663 		dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d6  + 0xc040b340,  9);
664 		cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d11 + 0x265e5a51, 14);
665 		bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d0  + 0xe9b6c7aa, 12);
666 		aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d5  + 0xd62f105d,  5);
667 		dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d10 + 0x02441453,  9);
668 		cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d15 + 0xd8a1e681, 14);
669 		bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d4  + 0xe7d3fbc8, 12);
670 		aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d9  + 0x21e1cde6,  5);
671 		dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d14 + 0xc33707d6,  9);
672 		cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d3  + 0xf4d50d87, 14);
673 		bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d8  + 0x455a14ed, 12);
674 		aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d13 + 0xa9e3e905,  5);
675 		dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d2  + 0xfcefa3f8,  9);
676 		cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d7  + 0x676f02d9, 14);
677 		bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d12 + 0x8d2a4c8a, 12);
678 
679 		aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d5  + 0xfffa3942,  4);
680 		dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d8  + 0x8771f681, 11);
681 		cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d11 + 0x6d9d6122, 16);
682 		bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d14 + 0xfde5380c,  9);
683 		aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d1  + 0xa4beea44,  4);
684 		dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d4  + 0x4bdecfa9, 11);
685 		cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d7  + 0xf6bb4b60, 16);
686 		bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d10 + 0xbebfbc70,  9);
687 		aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d13 + 0x289b7ec6,  4);
688 		dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d0  + 0xeaa127fa, 11);
689 		cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d3  + 0xd4ef3085, 16);
690 		bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d6  + 0x04881d05,  9);
691 		aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d9  + 0xd9d4d039,  4);
692 		dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d12 + 0xe6db99e5, 11);
693 		cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d15 + 0x1fa27cf8, 16);
694 		bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d2  + 0xc4ac5665,  9);
695 
696 		aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d0  + 0xf4292244,  6);
697 		dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d7  + 0x432aff97, 10);
698 		cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d14 + 0xab9423a7, 15);
699 		bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d5  + 0xfc93a039, 11);
700 		aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d12 + 0x655b59c3,  6);
701 		dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d3  + 0x8f0ccc92, 10);
702 		cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d10 + 0xffeff47d, 15);
703 		bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d1  + 0x85845dd1, 11);
704 		aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d8  + 0x6fa87e4f,  6);
705 		dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d15 + 0xfe2ce6e0, 10);
706 		cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d6  + 0xa3014314, 15);
707 		bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d13 + 0x4e0811a1, 11);
708 		aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d4  + 0xf7537e82,  6);
709 		dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d11 + 0xbd3af235, 10);
710 		cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d2  + 0x2ad7d2bb, 15);
711 		bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d9  + 0xeb86d391, 11);
712 
713 		hash[0] += aa;
714 		hash[1] += bb;
715 		hash[2] += cc;
716 		hash[3] += dd;
717 	}
718 
719 	// dxbc hash function is slightly modified version of MD5 hash.
720 	// https://web.archive.org/web/20190207230524/https://tools.ietf.org/html/rfc1321
721 	// https://web.archive.org/web/20190207230538/http://www.efgh.com/software/md5.txt
722 	//
723 	// Assumption is that data pointer, size are both 4-byte aligned,
724 	// and little endian.
725 	//
dxbcHash(const void * _data,uint32_t _size,void * _digest)726 	void dxbcHash(const void* _data, uint32_t _size, void* _digest)
727 	{
728 		uint32_t hash[4] =
729 		{
730 			0x67452301,
731 			0xefcdab89,
732 			0x98badcfe,
733 			0x10325476,
734 		};
735 
736 		const uint32_t* data = (const uint32_t*)_data;
737 		for (uint32_t ii = 0, num = _size/64; ii < num; ++ii)
738 		{
739 			dxbcHashBlock(data, hash);
740 			data += 16;
741 		}
742 
743 		uint32_t last[16];
744 		bx::memSet(last, 0, sizeof(last) );
745 
746 		const uint32_t remaining = _size & 0x3f;
747 
748 		if (remaining >= 56)
749 		{
750 			bx::memCopy(&last[0], data, remaining);
751 			last[remaining/4] = 0x80;
752 			dxbcHashBlock(last, hash);
753 
754 			bx::memSet(&last[1], 0, 56);
755 		}
756 		else
757 		{
758 			bx::memCopy(&last[1], data, remaining);
759 			last[1 + remaining/4] = 0x80;
760 		}
761 
762 		last[ 0] = _size * 8;
763 		last[15] = _size * 2 + 1;
764 		dxbcHashBlock(last, hash);
765 
766 		bx::memCopy(_digest, hash, 16);
767 	}
768 
read(bx::ReaderI * _reader,DxbcSubOperand & _subOperand,bx::Error * _err)769 	int32_t read(bx::ReaderI* _reader, DxbcSubOperand& _subOperand, bx::Error* _err)
770 	{
771 		uint32_t token;
772 		int32_t size = 0;
773 
774 		// 0       1       2       3
775 		// 76543210765432107654321076543210
776 		// e222111000nnttttttttssssssssmmoo
777 		// ^^  ^  ^  ^ ^       ^       ^ ^-- number of operands
778 		// ||  |  |  | |       |       +---- operand mode
779 		// ||  |  |  | |       +------------ operand mode bits
780 		// ||  |  |  | +-------------------- type
781 		// ||  |  |  +---------------------- number of addressing modes
782 		// ||  |  +------------------------- addressing mode 0
783 		// ||  +---------------------------- addressing mode 1
784 		// |+------------------------------- addressing mode 2
785 		// +-------------------------------- extended
786 
787 		size += bx::read(_reader, token, _err);
788 		_subOperand.type         = DxbcOperandType::Enum( (token & UINT32_C(0x000ff000) ) >> 12);
789 		_subOperand.numAddrModes =               uint8_t( (token & UINT32_C(0x00300000) ) >> 20);
790 		_subOperand.addrMode     =               uint8_t( (token & UINT32_C(0x01c00000) ) >> 22);
791 		_subOperand.mode         = DxbcOperandMode::Enum( (token & UINT32_C(0x0000000c) ) >>  2);
792 		_subOperand.modeBits     =               uint8_t( (token & UINT32_C(0x00000ff0) ) >>  4) & "\x0f\xff\x03\x00"[_subOperand.mode];
793 		_subOperand.num          =               uint8_t( (token & UINT32_C(0x00000003) )      );
794 
795 		switch (_subOperand.addrMode)
796 		{
797 		case DxbcOperandAddrMode::Imm32:
798 			size += bx::read(_reader, _subOperand.regIndex, _err);
799 			break;
800 
801 		case DxbcOperandAddrMode::Reg:
802 			{
803 				DxbcSubOperand subOperand;
804 				size += read(_reader, subOperand, _err);
805 			}
806 			break;
807 
808 		case DxbcOperandAddrMode::RegImm32:
809 			{
810 				size += bx::read(_reader, _subOperand.regIndex, _err);
811 
812 				DxbcSubOperand subOperand;
813 				size += read(_reader, subOperand, _err);
814 			}
815 			break;
816 
817 		case DxbcOperandAddrMode::RegImm64:
818 			{
819 				size += bx::read(_reader, _subOperand.regIndex, _err);
820 				size += bx::read(_reader, _subOperand.regIndex, _err);
821 
822 				DxbcSubOperand subOperand;
823 				size += read(_reader, subOperand, _err);
824 			}
825 			break;
826 
827 		default:
828 			BX_CHECK(false, "sub operand addressing mode %d", _subOperand.addrMode);
829 			break;
830 		}
831 
832 		return size;
833 	}
834 
write(bx::WriterI * _writer,const DxbcSubOperand & _subOperand,bx::Error * _err)835 	int32_t write(bx::WriterI* _writer, const DxbcSubOperand& _subOperand, bx::Error* _err)
836 	{
837 		int32_t size = 0;
838 
839 		uint32_t token = 0;
840 		token |= (_subOperand.type         << 12) & UINT32_C(0x000ff000);
841 		token |= (_subOperand.numAddrModes << 20) & UINT32_C(0x00300000);
842 		token |= (_subOperand.addrMode     << 22) & UINT32_C(0x01c00000);
843 		token |= (_subOperand.mode         <<  2) & UINT32_C(0x0000000c);
844 		token |= (_subOperand.modeBits     <<  4) & UINT32_C(0x00000ff0);
845 		token |=  _subOperand.num                 & UINT32_C(0x00000003);
846 		size += bx::write(_writer, token, _err);
847 
848 		switch (_subOperand.addrMode)
849 		{
850 		case DxbcOperandAddrMode::Imm32:
851 			size += bx::write(_writer, _subOperand.regIndex, _err);
852 			break;
853 
854 		case DxbcOperandAddrMode::Reg:
855 			{
856 				DxbcSubOperand subOperand;
857 				size += write(_writer, subOperand, _err);
858 			}
859 			break;
860 
861 		case DxbcOperandAddrMode::RegImm32:
862 			{
863 				size += bx::write(_writer, _subOperand.regIndex, _err);
864 
865 				DxbcSubOperand subOperand;
866 				size += write(_writer, subOperand, _err);
867 			}
868 			break;
869 
870 		case DxbcOperandAddrMode::RegImm64:
871 			{
872 				size += bx::write(_writer, _subOperand.regIndex, _err);
873 				size += bx::write(_writer, _subOperand.regIndex, _err);
874 
875 				DxbcSubOperand subOperand;
876 				size += write(_writer, subOperand, _err);
877 			}
878 			break;
879 
880 		default:
881 			BX_CHECK(false, "sub operand addressing mode %d", _subOperand.addrMode);
882 			break;
883 		}
884 
885 		return size;
886 	}
887 
read(bx::ReaderI * _reader,DxbcOperand & _operand,bx::Error * _err)888 	int32_t read(bx::ReaderI* _reader, DxbcOperand& _operand, bx::Error* _err)
889 	{
890 		int32_t size = 0;
891 
892 		uint32_t token;
893 		size += bx::read(_reader, token, _err);
894 
895 		// 0       1       2       3
896 		// 76543210765432107654321076543210
897 		// e222111000nnttttttttssssssssmmoo
898 		// ^^  ^  ^  ^ ^       ^       ^ ^-- number of operands
899 		// ||  |  |  | |       |       +---- operand mode
900 		// ||  |  |  | |       +------------ operand mode bits
901 		// ||  |  |  | +-------------------- type
902 		// ||  |  |  +---------------------- number of addressing modes
903 		// ||  |  +------------------------- addressing mode 0
904 		// ||  +---------------------------- addressing mode 1
905 		// |+------------------------------- addressing mode 2
906 		// +-------------------------------- extended
907 
908 		_operand.numAddrModes =               uint8_t( (token & UINT32_C(0x00300000) ) >> 20);
909 		_operand.addrMode[0]  =               uint8_t( (token & UINT32_C(0x01c00000) ) >> 22);
910 		_operand.addrMode[1]  =               uint8_t( (token & UINT32_C(0x0e000000) ) >> 25);
911 		_operand.addrMode[2]  =               uint8_t( (token & UINT32_C(0x70000000) ) >> 28);
912 		_operand.type         = DxbcOperandType::Enum( (token & UINT32_C(0x000ff000) ) >> 12);
913 		_operand.mode         = DxbcOperandMode::Enum( (token & UINT32_C(0x0000000c) ) >>  2);
914 		_operand.modeBits     =               uint8_t( (token & UINT32_C(0x00000ff0) ) >>  4) & "\x0f\xff\x03\x00"[_operand.mode];
915 		_operand.num          =               uint8_t( (token & UINT32_C(0x00000003) )      );
916 
917 		const bool extended = 0 != (token & UINT32_C(0x80000000) );
918 		if (extended)
919 		{
920 			uint32_t extBits = 0;
921 			size += bx::read(_reader, extBits, _err);
922 
923 			_operand.modifier = DxbcOperandModifier::Enum( (extBits & UINT32_C(0x00003fc0) ) >> 6);
924 		}
925 		else
926 		{
927 			_operand.modifier = DxbcOperandModifier::None;
928 		}
929 
930 		switch (_operand.type)
931 		{
932 		case DxbcOperandType::Imm32:
933 			_operand.num = 2 == _operand.num ? 4 : _operand.num;
934 			for (uint32_t ii = 0; ii < _operand.num; ++ii)
935 			{
936 				size += bx::read(_reader, _operand.un.imm32[ii], _err);
937 			}
938 			break;
939 
940 		case DxbcOperandType::Imm64:
941 			_operand.num = 2 == _operand.num ? 4 : _operand.num;
942 			for (uint32_t ii = 0; ii < _operand.num; ++ii)
943 			{
944 				size += bx::read(_reader, _operand.un.imm64[ii], _err);
945 			}
946 			break;
947 
948 		default:
949 			break;
950 		}
951 
952 		for (uint32_t ii = 0; ii < _operand.numAddrModes; ++ii)
953 		{
954 			switch (_operand.addrMode[ii])
955 			{
956 			case DxbcOperandAddrMode::Imm32:
957 				size += bx::read(_reader, _operand.regIndex[ii], _err);
958 				break;
959 
960 			case DxbcOperandAddrMode::Reg:
961 				size += read(_reader, _operand.subOperand[ii], _err);
962 				break;
963 
964 			case DxbcOperandAddrMode::RegImm32:
965 				size += bx::read(_reader, _operand.regIndex[ii], _err);
966 				size += read(_reader, _operand.subOperand[ii], _err);
967 				break;
968 
969 			default:
970 				BX_CHECK(false, "operand %d addressing mode %d", ii, _operand.addrMode[ii]);
971 				break;
972 			}
973 		}
974 
975 		return size;
976 	}
977 
write(bx::WriterI * _writer,const DxbcOperand & _operand,bx::Error * _err)978 	int32_t write(bx::WriterI* _writer, const DxbcOperand& _operand, bx::Error* _err)
979 	{
980 		int32_t size = 0;
981 
982 		const bool extended = _operand.modifier != DxbcOperandModifier::None;
983 
984 		uint32_t token = 0;
985 		token |=  extended                     ? UINT32_C(0x80000000) : 0;
986 		token |= (_operand.numAddrModes << 20) & UINT32_C(0x00300000);
987 		token |= (_operand.addrMode[0]  << 22) & UINT32_C(0x01c00000);
988 		token |= (_operand.addrMode[1]  << 25) & UINT32_C(0x0e000000);
989 		token |= (_operand.addrMode[2]  << 28) & UINT32_C(0x70000000);
990 		token |= (_operand.type         << 12) & UINT32_C(0x000ff000);
991 		token |= (_operand.mode         <<  2) & UINT32_C(0x0000000c);
992 
993 		token |= (4 == _operand.num ? 2 : _operand.num) & UINT32_C(0x00000003);
994 		token |= ( (_operand.modeBits & "\x0f\xff\x03\x00"[_operand.mode]) << 4) & UINT32_C(0x00000ff0);
995 
996 		size += bx::write(_writer, token, _err);
997 
998 		if (extended)
999 		{
1000 			uint32_t extBits = 0
1001 				| ( (_operand.modifier << 6) & UINT32_C(0x00003fc0) )
1002 				| 1 /* 1 == has extended operand modifier */
1003 				;
1004 			size += bx::write(_writer, extBits, _err);
1005 		}
1006 
1007 		switch (_operand.type)
1008 		{
1009 		case DxbcOperandType::Imm32:
1010 			for (uint32_t ii = 0; ii < _operand.num; ++ii)
1011 			{
1012 				size += bx::write(_writer, _operand.un.imm32[ii], _err);
1013 			}
1014 			break;
1015 
1016 		case DxbcOperandType::Imm64:
1017 			for (uint32_t ii = 0; ii < _operand.num; ++ii)
1018 			{
1019 				size += bx::write(_writer, _operand.un.imm64[ii], _err);
1020 			}
1021 			break;
1022 
1023 		default:
1024 			break;
1025 		}
1026 
1027 		for (uint32_t ii = 0, num = bx::uint32_min(_operand.numAddrModes, BX_COUNTOF(_operand.addrMode) ); ii < num; ++ii)
1028 		{
1029 			switch (_operand.addrMode[ii])
1030 			{
1031 			case DxbcOperandAddrMode::Imm32:
1032 				size += bx::write(_writer, _operand.regIndex[ii], _err);
1033 				break;
1034 
1035 			case DxbcOperandAddrMode::Reg:
1036 				size += write(_writer, _operand.subOperand[ii], _err);
1037 				break;
1038 
1039 			case DxbcOperandAddrMode::RegImm32:
1040 				size += bx::write(_writer, _operand.regIndex[ii], _err);
1041 				size += write(_writer, _operand.subOperand[ii], _err);
1042 				break;
1043 
1044 			default:
1045 				BX_CHECK(false, "operand %d addressing mode %d", ii, _operand.addrMode[ii]);
1046 				break;
1047 			}
1048 		}
1049 
1050 		return size;
1051 	}
1052 
read(bx::ReaderI * _reader,DxbcInstruction & _instruction,bx::Error * _err)1053 	int32_t read(bx::ReaderI* _reader, DxbcInstruction& _instruction, bx::Error* _err)
1054 	{
1055 		int32_t size = 0;
1056 
1057 		uint32_t token;
1058 		size += bx::read(_reader, token, _err);
1059 
1060 		// 0       1       2       3
1061 		// 76543210765432107654321076543210
1062 		// elllllll.............ooooooooooo
1063 		// ^^                   ^----------- opcode
1064 		// |+------------------------------- length
1065 		// +-------------------------------- extended
1066 
1067 		_instruction.opcode = DxbcOpcode::Enum( (token & UINT32_C(0x000007ff) )      );
1068 		_instruction.length =          uint8_t( (token & UINT32_C(0x7f000000) ) >> 24);
1069 		bool extended       =              0 != (token & UINT32_C(0x80000000) );
1070 
1071 		_instruction.srv     = DxbcResourceDim::Unknown;
1072 		_instruction.samples = 0;
1073 
1074 		_instruction.shadow = false;
1075 		_instruction.mono   = false;
1076 
1077 		_instruction.allowRefactoring = false;
1078 		_instruction.fp64             = false;
1079 		_instruction.earlyDepth       = false;
1080 		_instruction.enableBuffers    = false;
1081 		_instruction.skipOptimization = false;
1082 		_instruction.enableMinPrecision     = false;
1083 		_instruction.enableDoubleExtensions = false;
1084 		_instruction.enableShaderExtensions = false;
1085 
1086 		_instruction.threadsInGroup = false;
1087 		_instruction.sharedMemory   = false;
1088 		_instruction.uavGroup       = false;
1089 		_instruction.uavGlobal      = false;
1090 
1091 		_instruction.saturate = false;
1092 		_instruction.testNZ   = false;
1093 		_instruction.retType  = DxbcResourceReturnType::Unused;
1094 
1095 		_instruction.customDataClass = DxbcCustomDataClass::Comment;
1096 		_instruction.customData.clear();
1097 
1098 		switch (_instruction.opcode)
1099 		{
1100 			case DxbcOpcode::CUSTOMDATA:
1101 				{
1102 					_instruction.customDataClass = DxbcCustomDataClass::Enum( (token & UINT32_C(0xfffff800) ) >> 11);
1103 
1104 					_instruction.numOperands = 0;
1105 					size += bx::read(_reader, _instruction.length, _err);
1106 					for (uint32_t ii = 0, num = (_instruction.length-2); ii < num && _err->isOk(); ++ii)
1107 					{
1108 						uint32_t temp;
1109 						size += bx::read(_reader, temp, _err);
1110 						if (_err->isOk() )
1111 						{
1112 							_instruction.customData.push_back(temp);
1113 						}
1114 					}
1115 				}
1116 				return size;
1117 
1118 			case DxbcOpcode::DCL_CONSTANT_BUFFER:
1119 				// 0       1       2       3
1120 				// 76543210765432107654321076543210
1121 				// ........            a...........
1122 				//                     ^------------ Allow refactoring
1123 
1124 				_instruction.allowRefactoring = 0 != (token & UINT32_C(0x00000800) );
1125 				break;
1126 
1127 			case DxbcOpcode::DCL_GLOBAL_FLAGS:
1128 				// 0       1       2       3
1129 				// 76543210765432107654321076543210
1130 				// ........     sxmoudfa...........
1131 				//              ^^^^^^^^------------ Allow refactoring
1132 				//              ||||||+------------- FP64
1133 				//              |||||+-------------- Force early depth/stencil
1134 				//              ||||+--------------- Enable raw and structured buffers
1135 				//              |||+---------------- Skip optimizations
1136 				//              ||+----------------- Enable minimum precision
1137 				//              |+------------------ Enable double extension
1138 				//              +------------------- Enable shader extension
1139 
1140 				_instruction.allowRefactoring       = 0 != (token & UINT32_C(0x00000800) );
1141 				_instruction.fp64                   = 0 != (token & UINT32_C(0x00001000) );
1142 				_instruction.earlyDepth             = 0 != (token & UINT32_C(0x00002000) );
1143 				_instruction.enableBuffers          = 0 != (token & UINT32_C(0x00004000) );
1144 				_instruction.skipOptimization       = 0 != (token & UINT32_C(0x00008000) );
1145 				_instruction.enableMinPrecision     = 0 != (token & UINT32_C(0x00010000) );
1146 				_instruction.enableDoubleExtensions = 0 != (token & UINT32_C(0x00020000) );
1147 				_instruction.enableShaderExtensions = 0 != (token & UINT32_C(0x00040000) );
1148 				break;
1149 
1150 			case DxbcOpcode::DCL_INPUT_PS:
1151 				// 0       1       2       3
1152 				// 76543210765432107654321076543210
1153 				// ........        iiiii...........
1154 				//                 ^---------------- Interploation
1155 
1156 				_instruction.interpolation = DxbcInterpolation::Enum( (token & UINT32_C(0x0000f800) ) >> 11);
1157 				break;
1158 
1159 			case DxbcOpcode::DCL_RESOURCE:
1160 				// 0       1       2       3
1161 				// 76543210765432107654321076543210
1162 				// ........ sssssssrrrrr...........
1163 				//          ^      ^---------------- SRV
1164 				//          +----------------------- MSAA samples
1165 
1166 				_instruction.srv     = DxbcResourceDim::Enum( (token & UINT32_C(0x0000f800) ) >> 11);
1167 				_instruction.samples =               uint8_t( (token & UINT32_C(0x007f0000) ) >> 16);
1168 				break;
1169 
1170 			case DxbcOpcode::DCL_SAMPLER:
1171 				// 0       1       2       3
1172 				// 76543210765432107654321076543210
1173 				// ........           ms...........
1174 				//                    ^^------------ Shadow sampler
1175 				//                    +------------- Mono
1176 
1177 				_instruction.shadow = 0 != (token & UINT32_C(0x00000800) );
1178 				_instruction.mono   = 0 != (token & UINT32_C(0x00001000) );
1179 				break;
1180 
1181 			case DxbcOpcode::SYNC:
1182 				// 0       1       2       3
1183 				// 76543210765432107654321076543210
1184 				// ........         gust...........
1185 				//                  ^^^^------------ Threads in group
1186 				//                  ||+------------- Shared memory
1187 				//                  |+-------------- UAV group
1188 				//                  +--------------- UAV global
1189 
1190 				_instruction.threadsInGroup = 0 != (token & UINT32_C(0x00000800) );
1191 				_instruction.sharedMemory   = 0 != (token & UINT32_C(0x00001000) );
1192 				_instruction.uavGroup       = 0 != (token & UINT32_C(0x00002000) );
1193 				_instruction.uavGlobal      = 0 != (token & UINT32_C(0x00004000) );
1194 				break;
1195 
1196 			default:
1197 				// 0       1       2       3
1198 				// 76543210765432107654321076543210
1199 				// ........ ppppn    stt...........
1200 				//          ^   ^    ^^------------- Resource info return type
1201 				//          |   |    +-------------- Saturate
1202 				//          |   +------------------- Test not zero
1203 				//          +----------------------- Precise mask
1204 
1205 				_instruction.retType  = DxbcResourceReturnType::Enum( (token & UINT32_C(0x00001800) ) >> 11);
1206 				_instruction.saturate =                          0 != (token & UINT32_C(0x00002000) );
1207 				_instruction.testNZ   =                          0 != (token & UINT32_C(0x00040000) );
1208 //				_instruction.precise  =              uint8_t( (token & UINT32_C(0x00780000) ) >> 19);
1209 				break;
1210 		}
1211 
1212 		_instruction.extended[0] = DxbcInstruction::ExtendedType::Count;
1213 		for (uint32_t ii = 0; extended; ++ii)
1214 		{
1215 			// 0       1       2       3
1216 			// 76543210765432107654321076543210
1217 			// e..........................ttttt
1218 			// ^                          ^
1219 			// |                          +----- type
1220 			// +-------------------------------- extended
1221 
1222 			uint32_t extBits;
1223 			size += bx::read(_reader, extBits, _err);
1224 			extended = 0 != (extBits & UINT32_C(0x80000000) );
1225 			_instruction.extended[ii  ] = DxbcInstruction::ExtendedType::Enum(extBits & UINT32_C(0x0000001f) );
1226 			_instruction.extended[ii+1] = DxbcInstruction::ExtendedType::Count;
1227 
1228 			switch (_instruction.extended[ii])
1229 			{
1230 			case DxbcInstruction::ExtendedType::SampleControls:
1231 				// 0       1       2       3
1232 				// 76543210765432107654321076543210
1233 				// .          zzzzyyyyxxxx    .....
1234 				//            ^   ^   ^
1235 				//            |   |   +------------- x
1236 				//            |   +----------------- y
1237 				//            +--------------------- z
1238 
1239 				_instruction.sampleOffsets[0] = uint8_t( (extBits & UINT32_C(0x00001e00) ) >>  9);
1240 				_instruction.sampleOffsets[1] = uint8_t( (extBits & UINT32_C(0x0001e000) ) >> 13);
1241 				_instruction.sampleOffsets[2] = uint8_t( (extBits & UINT32_C(0x001e0000) ) >> 17);
1242 				break;
1243 
1244 			case DxbcInstruction::ExtendedType::ResourceDim:
1245 				// 0       1       2       3
1246 				// 76543210765432107654321076543210
1247 				// .                          .....
1248 				//
1249 
1250 				_instruction.resourceTarget = uint8_t( (extBits & UINT32_C(0x000003e0) ) >>  6);
1251 				_instruction.resourceStride = uint8_t( (extBits & UINT32_C(0x0000f800) ) >> 11);
1252 				break;
1253 
1254 			case DxbcInstruction::ExtendedType::ResourceReturnType:
1255 				// 0       1       2       3
1256 				// 76543210765432107654321076543210
1257 				// .          3333222211110000.....
1258 				//            ^   ^   ^
1259 				//            |   |   +------------- x
1260 				//            |   +----------------- y
1261 				//            +--------------------- z
1262 
1263 				_instruction.resourceReturnTypes[0] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x000001e0) ) >>   6);
1264 				_instruction.resourceReturnTypes[1] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x00001e00) ) >>   9);
1265 				_instruction.resourceReturnTypes[2] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x0001e000) ) >>  13);
1266 				_instruction.resourceReturnTypes[3] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x001e0000) ) >>  17);
1267 				break;
1268 
1269 			default:
1270 				break;
1271 			}
1272 		}
1273 
1274 		switch (_instruction.opcode)
1275 		{
1276 			case DxbcOpcode::DCL_FUNCTION_TABLE:
1277 				{
1278 					uint32_t tableId;
1279 					size += read(_reader, tableId, _err);
1280 
1281 					uint32_t num;
1282 					size += read(_reader, num);
1283 
1284 					for (uint32_t ii = 0; ii < num; ++ii)
1285 					{
1286 						uint32_t bodyId;
1287 						size += read(_reader, bodyId, _err);
1288 					}
1289 				}
1290 				break;
1291 
1292 			case DxbcOpcode::DCL_INTERFACE:
1293 				{
1294 					uint32_t interfaceId;
1295 					size += read(_reader, interfaceId, _err);
1296 
1297 					uint32_t num;
1298 					size += read(_reader, num, _err);
1299 
1300 					BX_CHECK(false, "not implemented.");
1301 				}
1302 				break;
1303 
1304 			default:
1305 				break;
1306 		};
1307 
1308 		uint32_t currOp = 0;
1309 
1310 		const DxbcOpcodeInfo& info = s_dxbcOpcodeInfo[_instruction.opcode];
1311 		_instruction.numOperands = info.numOperands;
1312 		switch (info.numOperands)
1313 		{
1314 		case 6: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH;
1315 		case 5: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH;
1316 		case 4: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH;
1317 		case 3: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH;
1318 		case 2: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH;
1319 		case 1: size += read(_reader, _instruction.operand[currOp++], _err); BX_FALLTHROUGH;
1320 		case 0:
1321 			if (0 < info.numValues)
1322 			{
1323 				size += read(_reader, _instruction.value, info.numValues*sizeof(uint32_t), _err);
1324 			}
1325 			break;
1326 
1327 		default:
1328 			BX_CHECK(false, "Instruction %s with invalid number of operands %d (numValues %d)."
1329 					, getName(_instruction.opcode)
1330 					, info.numOperands
1331 					, info.numValues
1332 					);
1333 			break;
1334 		}
1335 
1336 		return size;
1337 	}
1338 
write(bx::WriterI * _writer,const DxbcInstruction & _instruction,bx::Error * _err)1339 	int32_t write(bx::WriterI* _writer, const DxbcInstruction& _instruction, bx::Error* _err)
1340 	{
1341 		uint32_t token = 0;
1342 		token |= (_instruction.opcode        ) & UINT32_C(0x000007ff);
1343 		token |= (_instruction.length   << 24) & UINT32_C(0x7f000000);
1344 
1345 		token |=  DxbcInstruction::ExtendedType::Count != _instruction.extended[0]
1346 			? UINT32_C(0x80000000)
1347 			: 0
1348 			;
1349 
1350 		int32_t size =0;
1351 
1352 		switch (_instruction.opcode)
1353 		{
1354 			case DxbcOpcode::CUSTOMDATA:
1355 				{
1356 					token &= UINT32_C(0x000007ff);
1357 					token |= _instruction.customDataClass << 11;
1358 
1359 					size += bx::write(_writer, token);
1360 
1361 					uint32_t len = uint32_t(_instruction.customData.size()*sizeof(uint32_t) );
1362 					size += bx::write(_writer, len/4+2, _err);
1363 					size += bx::write(_writer, _instruction.customData.data(), len, _err);
1364 				}
1365 				return size;
1366 
1367 			case DxbcOpcode::DCL_CONSTANT_BUFFER:
1368 				token |= _instruction.allowRefactoring ? UINT32_C(0x00000800) : 0;
1369 				break;
1370 
1371 			case DxbcOpcode::DCL_GLOBAL_FLAGS:
1372 				token |= _instruction.allowRefactoring       ? UINT32_C(0x00000800) : 0;
1373 				token |= _instruction.fp64                   ? UINT32_C(0x00001000) : 0;
1374 				token |= _instruction.earlyDepth             ? UINT32_C(0x00002000) : 0;
1375 				token |= _instruction.enableBuffers          ? UINT32_C(0x00004000) : 0;
1376 				token |= _instruction.skipOptimization       ? UINT32_C(0x00008000) : 0;
1377 				token |= _instruction.enableMinPrecision     ? UINT32_C(0x00010000) : 0;
1378 				token |= _instruction.enableDoubleExtensions ? UINT32_C(0x00020000) : 0;
1379 				token |= _instruction.enableShaderExtensions ? UINT32_C(0x00040000) : 0;
1380 				break;
1381 
1382 			case DxbcOpcode::DCL_INPUT_PS:
1383 				token |= (_instruction.interpolation << 11) & UINT32_C(0x0000f800);
1384 				break;
1385 
1386 			case DxbcOpcode::DCL_RESOURCE:
1387 				token |= (_instruction.srv     << 11) & UINT32_C(0x0000f800);
1388 				token |= (_instruction.samples << 16) & UINT32_C(0x007f0000);
1389 				break;
1390 
1391 			case DxbcOpcode::DCL_SAMPLER:
1392 				token |= _instruction.shadow ? (0x00000800) : 0;
1393 				token |= _instruction.mono   ? (0x00001000) : 0;
1394 				break;
1395 
1396 			case DxbcOpcode::SYNC:
1397 				token |= _instruction.threadsInGroup ? UINT32_C(0x00000800) : 0;
1398 				token |= _instruction.sharedMemory   ? UINT32_C(0x00001000) : 0;
1399 				token |= _instruction.uavGroup       ? UINT32_C(0x00002000) : 0;
1400 				token |= _instruction.uavGlobal      ? UINT32_C(0x00004000) : 0;
1401 				break;
1402 
1403 			default:
1404 				token |= (_instruction.retType << 11) & UINT32_C(0x00001800);
1405 				token |=  _instruction.saturate ? UINT32_C(0x00002000) : 0;
1406 				token |=  _instruction.testNZ   ? UINT32_C(0x00040000) : 0;
1407 //				_instruction.precise  =              uint8_t( (token & UINT32_C(0x00780000) ) >> 19);
1408 				break;
1409 		}
1410 
1411 		size += bx::write(_writer, token);
1412 
1413 		for (uint32_t ii = 0; _instruction.extended[ii] != DxbcInstruction::ExtendedType::Count; ++ii)
1414 		{
1415 			// 0       1       2       3
1416 			// 76543210765432107654321076543210
1417 			// e..........................ttttt
1418 			// ^                          ^
1419 			// |                          +----- type
1420 			// +-------------------------------- extended
1421 
1422 			token = _instruction.extended[ii+1] == DxbcInstruction::ExtendedType::Count
1423 				? 0
1424 				: UINT32_C(0x80000000)
1425 				;
1426 			token |= uint8_t(_instruction.extended[ii]);
1427 
1428 			switch (_instruction.extended[ii])
1429 			{
1430 			case DxbcInstruction::ExtendedType::SampleControls:
1431 				// 0       1       2       3
1432 				// 76543210765432107654321076543210
1433 				// .          zzzzyyyyxxxx    .....
1434 				//            ^   ^   ^
1435 				//            |   |   +------------- x
1436 				//            |   +----------------- y
1437 				//            +--------------------- z
1438 
1439 				token |= (uint32_t(_instruction.sampleOffsets[0]) <<  9) & UINT32_C(0x00001e00);
1440 				token |= (uint32_t(_instruction.sampleOffsets[1]) << 13) & UINT32_C(0x0001e000);
1441 				token |= (uint32_t(_instruction.sampleOffsets[2]) << 17) & UINT32_C(0x001e0000);
1442 				break;
1443 
1444 			case DxbcInstruction::ExtendedType::ResourceDim:
1445 				// 0       1       2       3
1446 				// 76543210765432107654321076543210
1447 				// .                          .....
1448 				//
1449 
1450 				token |= (uint32_t(_instruction.resourceTarget <<  6) & UINT32_C(0x000003e0) );
1451 				token |= (uint32_t(_instruction.resourceStride << 11) & UINT32_C(0x0000f800) );
1452 				break;
1453 
1454 			case DxbcInstruction::ExtendedType::ResourceReturnType:
1455 				// 0       1       2       3
1456 				// 76543210765432107654321076543210
1457 				// .          3333222211110000.....
1458 				//            ^   ^   ^
1459 				//            |   |   +------------- x
1460 				//            |   +----------------- y
1461 				//            +--------------------- z
1462 
1463 				token |= (uint32_t(_instruction.resourceReturnTypes[0]) <<  6) & UINT32_C(0x000001e0);
1464 				token |= (uint32_t(_instruction.resourceReturnTypes[1]) <<  9) & UINT32_C(0x00001e00);
1465 				token |= (uint32_t(_instruction.resourceReturnTypes[2]) << 13) & UINT32_C(0x0001e000);
1466 				token |= (uint32_t(_instruction.resourceReturnTypes[3]) << 17) & UINT32_C(0x001e0000);
1467 				break;
1468 
1469 			default:
1470 				break;
1471 			}
1472 
1473 			size += bx::write(_writer, token, _err);
1474 		}
1475 
1476 		for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii)
1477 		{
1478 			size += write(_writer, _instruction.operand[ii], _err);
1479 		}
1480 
1481 		const DxbcOpcodeInfo& info = s_dxbcOpcodeInfo[_instruction.opcode];
1482 		if (0 < info.numValues)
1483 		{
1484 			size += bx::write(_writer, _instruction.value, info.numValues*sizeof(uint32_t), _err);
1485 		}
1486 
1487 		return size;
1488 	}
1489 
toString(char * _out,int32_t _size,DxbcOperandMode::Enum _mode,uint8_t _modeBits)1490 	int32_t toString(char* _out, int32_t _size, DxbcOperandMode::Enum _mode, uint8_t _modeBits)
1491 	{
1492 		int32_t size = 0;
1493 
1494 		switch (_mode)
1495 		{
1496 		case DxbcOperandMode::Mask:
1497 			if (0xf > _modeBits
1498 			&&  0   < _modeBits)
1499 			{
1500 				size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1501 							, ".%s%s%s%s"
1502 							, 0 == (_modeBits & 1) ? "" : "x"
1503 							, 0 == (_modeBits & 2) ? "" : "y"
1504 							, 0 == (_modeBits & 4) ? "" : "z"
1505 							, 0 == (_modeBits & 8) ? "" : "w"
1506 							);
1507 			}
1508 			break;
1509 
1510 		case DxbcOperandMode::Swizzle:
1511 			if (0xe4 != _modeBits)
1512 			{
1513 				size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1514 							, ".%c%c%c%c"
1515 							, "xyzw"[(_modeBits   )&0x3]
1516 							, "xyzw"[(_modeBits>>2)&0x3]
1517 							, "xyzw"[(_modeBits>>4)&0x3]
1518 							, "xyzw"[(_modeBits>>6)&0x3]
1519 							);
1520 			}
1521 			break;
1522 
1523 		case DxbcOperandMode::Scalar:
1524 			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1525 						, ".%c"
1526 						, "xyzw"[_modeBits]
1527 						);
1528 			break;
1529 
1530 		default:
1531 			break;
1532 		}
1533 
1534 		return size;
1535 	}
1536 
toString(char * _out,int32_t _size,const DxbcInstruction & _instruction)1537 	int32_t toString(char* _out, int32_t _size, const DxbcInstruction& _instruction)
1538 	{
1539 		int32_t size = 0;
1540 
1541 		switch (_instruction.opcode)
1542 		{
1543 		case DxbcOpcode::CUSTOMDATA:
1544 			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1545 						, "%s"
1546 						, s_dxbcCustomDataClass[_instruction.customDataClass]
1547 						);
1548 			break;
1549 
1550 		case DxbcOpcode::IF:
1551 			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1552 						, "%s%s"
1553 						, getName(_instruction.opcode)
1554 						, _instruction.testNZ ? "_nz"  : "_z"
1555 						);
1556 			break;
1557 
1558 		default:
1559 			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1560 						, "%s%s%s"
1561 						, getName(_instruction.opcode)
1562 						, _instruction.saturate ? "_sat" : ""
1563 						, _instruction.testNZ   ? "_nz"  : ""
1564 						);
1565 			break;
1566 		}
1567 
1568 		if (DxbcResourceDim::Unknown != _instruction.srv)
1569 		{
1570 			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1571 						, " %s<%x>"
1572 						, s_dxbcSrvType[_instruction.srv]
1573 						, _instruction.value[0]
1574 						);
1575 		}
1576 		else if (0 < s_dxbcOpcodeInfo[_instruction.opcode].numValues)
1577 		{
1578 			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1579 						, " %d"
1580 						, _instruction.value[0]
1581 						);
1582 		}
1583 
1584 		for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii)
1585 		{
1586 			const DxbcOperand& operand = _instruction.operand[ii];
1587 
1588 			const bool array = false
1589 				|| 1 < operand.numAddrModes
1590 				|| DxbcOperandAddrMode::Imm32 != operand.addrMode[0]
1591 				;
1592 
1593 			const char* preOperand  = "";
1594 			const char* postOperand = "";
1595 
1596 			switch (operand.modifier)
1597 			{
1598 			case DxbcOperandModifier::Neg:    preOperand =     "-"; postOperand =  ""; break;
1599 			case DxbcOperandModifier::Abs:    preOperand =  "abs("; postOperand = ")"; break;
1600 			case DxbcOperandModifier::AbsNeg: preOperand = "-abs("; postOperand = ")"; break;
1601 			default: break;
1602 			}
1603 
1604 			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1605 						, "%s%s%s"
1606 						, 0 == ii ? " " : ", "
1607 						, preOperand
1608 						, s_dxbcOperandType[operand.type]
1609 						);
1610 
1611 			switch (operand.type)
1612 			{
1613 			case DxbcOperandType::Imm32:
1614 			case DxbcOperandType::Imm64:
1615 				for (uint32_t jj = 0; jj < operand.num; ++jj)
1616 				{
1617 					union { uint32_t i; float f; } cast = { operand.un.imm32[jj] };
1618 					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1619 								, "%s%f"
1620 								, 0 == jj ? "(" : ", "
1621 								, cast.f
1622 								);
1623 				}
1624 
1625 				size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1626 							, ")"
1627 							);
1628 				break;
1629 
1630 			default:
1631 				break;
1632 			}
1633 
1634 			const uint32_t first = false
1635 				|| DxbcOperandType::ImmConstantBuffer == operand.type
1636 				|| DxbcOperandAddrMode::RegImm32      == operand.addrMode[0]
1637 				? 0 : 1
1638 				;
1639 			if (0 == first)
1640 			{
1641 				size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1642 							, "["
1643 							);
1644 			}
1645 			else
1646 			{
1647 				size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1648 							, "%d%s"
1649 							, operand.regIndex[0]
1650 							, array ? "[" : ""
1651 							);
1652 			}
1653 
1654 			for (uint32_t jj = first, num = bx::uint32_min(operand.numAddrModes, BX_COUNTOF(operand.addrMode) ); jj < num; ++jj)
1655 			{
1656 				switch (operand.addrMode[jj])
1657 				{
1658 				case DxbcOperandAddrMode::Imm32:
1659 					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1660 								, "%d"
1661 								, operand.regIndex[jj]
1662 								);
1663 					break;
1664 
1665 				case DxbcOperandAddrMode::Reg:
1666 					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1667 								, "%s%d"
1668 								, s_dxbcOperandType[operand.subOperand[jj].type]
1669 								, operand.subOperand[jj].regIndex
1670 								);
1671 					size += toString(&_out[size], bx::uint32_imax(0, _size-size)
1672 								, DxbcOperandMode::Enum(operand.subOperand[jj].mode)
1673 								, operand.subOperand[jj].modeBits
1674 								);
1675 					break;
1676 
1677 				case DxbcOperandAddrMode::RegImm32:
1678 					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1679 								, "%d + %s%d"
1680 								, operand.regIndex[jj]
1681 								, s_dxbcOperandType[operand.subOperand[jj].type]
1682 								, operand.subOperand[jj].regIndex
1683 								);
1684 					size += toString(&_out[size], bx::uint32_imax(0, _size-size)
1685 								, DxbcOperandMode::Enum(operand.subOperand[jj].mode)
1686 								, operand.subOperand[jj].modeBits
1687 								);
1688 					break;
1689 
1690 				default:
1691 					size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size), "???");
1692 					break;
1693 				}
1694 			}
1695 
1696 			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1697 						, "%s"
1698 						, array ? "]" : ""
1699 						);
1700 
1701 			size += toString(&_out[size], bx::uint32_imax(0, _size-size), operand.mode, operand.modeBits);
1702 
1703 			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1704 						, "%s"
1705 						, postOperand
1706 						);
1707 		}
1708 
1709 		if (_instruction.opcode == DxbcOpcode::DCL_CONSTANT_BUFFER
1710 		&&  _instruction.allowRefactoring)
1711 		{
1712 			size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
1713 						, ", dynamicIndexed"
1714 						);
1715 		}
1716 
1717 		return size;
1718 	}
1719 
read(bx::ReaderSeekerI * _reader,DxbcSignature & _signature,bx::Error * _err)1720 	int32_t read(bx::ReaderSeekerI* _reader, DxbcSignature& _signature, bx::Error* _err)
1721 	{
1722 		int32_t size = 0;
1723 
1724 		int64_t offset = bx::seek(_reader);
1725 
1726 		uint32_t num;
1727 		size += bx::read(_reader, num, _err);
1728 		size += bx::read(_reader, _signature.key, _err);
1729 
1730 		for (uint32_t ii = 0; ii < num; ++ii)
1731 		{
1732 			DxbcSignature::Element element;
1733 
1734 			uint32_t nameOffset;
1735 			size += bx::read(_reader, nameOffset);
1736 
1737 			char name[DXBC_MAX_NAME_STRING];
1738 			readString(_reader, offset + nameOffset, name, DXBC_MAX_NAME_STRING, _err);
1739 			element.name = name;
1740 
1741 			size += bx::read(_reader, element.semanticIndex, _err);
1742 			size += bx::read(_reader, element.valueType, _err);
1743 			size += bx::read(_reader, element.componentType, _err);
1744 			size += bx::read(_reader, element.registerIndex, _err);
1745 			size += bx::read(_reader, element.mask, _err);
1746 			size += bx::read(_reader, element.readWriteMask, _err);
1747 			size += bx::read(_reader, element.stream, _err);
1748 
1749 			// padding
1750 			uint8_t padding;
1751 			size += bx::read(_reader, padding, _err);
1752 
1753 			_signature.elements.push_back(element);
1754 		}
1755 
1756 		return size;
1757 	}
1758 
write(bx::WriterI * _writer,const DxbcSignature & _signature,bx::Error * _err)1759 	int32_t write(bx::WriterI* _writer, const DxbcSignature& _signature, bx::Error* _err)
1760 	{
1761 		int32_t size = 0;
1762 
1763 		const uint32_t num = uint32_t(_signature.elements.size() );
1764 		size += bx::write(_writer, num, _err);
1765 		size += bx::write(_writer, _signature.key, _err);
1766 
1767 		typedef stl::unordered_map<stl::string, uint32_t> NameOffsetMap;
1768 		NameOffsetMap nom;
1769 
1770 		const uint8_t pad = 0;
1771 		uint32_t nameOffset = num * 24 + 8;
1772 		for (uint32_t ii = 0; ii < num; ++ii)
1773 		{
1774 			const DxbcSignature::Element& element = _signature.elements[ii];
1775 
1776 			NameOffsetMap::iterator it = nom.find(element.name);
1777 			if (it == nom.end() )
1778 			{
1779 				nom.insert(stl::make_pair(element.name, nameOffset) );
1780 				size += bx::write(_writer, nameOffset, _err);
1781 				nameOffset += uint32_t(element.name.size() + 1);
1782 			}
1783 			else
1784 			{
1785 				size += bx::write(_writer, it->second);
1786 			}
1787 
1788 			size += bx::write(_writer, element.semanticIndex, _err);
1789 			size += bx::write(_writer, element.valueType, _err);
1790 			size += bx::write(_writer, element.componentType, _err);
1791 			size += bx::write(_writer, element.registerIndex, _err);
1792 			size += bx::write(_writer, element.mask, _err);
1793 			size += bx::write(_writer, element.readWriteMask, _err);
1794 			size += bx::write(_writer, element.stream, _err);
1795 			size += bx::write(_writer, pad, _err);
1796 		}
1797 
1798 		uint32_t len = 0;
1799 		for (uint32_t ii = 0; ii < num; ++ii)
1800 		{
1801 			const DxbcSignature::Element& element = _signature.elements[ii];
1802 			NameOffsetMap::iterator it = nom.find(element.name);
1803 			if (it != nom.end() )
1804 			{
1805 				nom.erase(it);
1806 				size += bx::write(_writer, element.name.c_str(), uint32_t(element.name.size() + 1), _err);
1807 				len  += uint32_t(element.name.size() + 1);
1808 			}
1809 		}
1810 
1811 		// align 4 bytes
1812 		size += bx::writeRep(_writer, 0xab, (len+3)/4*4 - len, _err);
1813 
1814 		return size;
1815 	}
1816 
read(bx::ReaderSeekerI * _reader,DxbcShader & _shader,bx::Error * _err)1817 	int32_t read(bx::ReaderSeekerI* _reader, DxbcShader& _shader, bx::Error* _err)
1818 	{
1819 		int32_t size = 0;
1820 
1821 		size += bx::read(_reader, _shader.version, _err);
1822 
1823 		uint32_t bcLength;
1824 		size += bx::read(_reader, bcLength, _err);
1825 
1826 		uint32_t len = (bcLength-2)*sizeof(uint32_t);
1827 		_shader.byteCode.resize(len);
1828 		size += bx::read(_reader, _shader.byteCode.data(), len, _err);
1829 
1830 		return size;
1831 	}
1832 
write(bx::WriterI * _writer,const DxbcShader & _shader,bx::Error * _err)1833 	int32_t write(bx::WriterI* _writer, const DxbcShader& _shader, bx::Error* _err)
1834 	{
1835 		const uint32_t len = uint32_t(_shader.byteCode.size() );
1836 		const uint32_t bcLength = len / sizeof(uint32_t) + 2;
1837 
1838 		int32_t size = 0;
1839 		size += bx::write(_writer, _shader.version, _err);
1840 		size += bx::write(_writer, bcLength, _err);
1841 		size += bx::write(_writer, _shader.byteCode.data(), len, _err);
1842 
1843 		return size;
1844 	}
1845 
1846 #define DXBC_CHUNK_SHADER           BX_MAKEFOURCC('S', 'H', 'D', 'R')
1847 #define DXBC_CHUNK_SHADER_EX        BX_MAKEFOURCC('S', 'H', 'E', 'X')
1848 
1849 #define DXBC_CHUNK_INPUT_SIGNATURE  BX_MAKEFOURCC('I', 'S', 'G', 'N')
1850 #define DXBC_CHUNK_OUTPUT_SIGNATURE BX_MAKEFOURCC('O', 'S', 'G', 'N')
1851 
read(bx::ReaderSeekerI * _reader,DxbcContext & _dxbc,bx::Error * _err)1852 	int32_t read(bx::ReaderSeekerI* _reader, DxbcContext& _dxbc, bx::Error* _err)
1853 	{
1854 		int32_t size = 0;
1855 		size += bx::read(_reader, _dxbc.header, _err);
1856 		_dxbc.shader.shex = false;
1857 		_dxbc.shader.aon9 = false;
1858 
1859 		for (uint32_t ii = 0; ii < _dxbc.header.numChunks; ++ii)
1860 		{
1861 			bx::seek(_reader, sizeof(DxbcContext::Header) + ii*sizeof(uint32_t), bx::Whence::Begin);
1862 
1863 			uint32_t chunkOffset;
1864 			size += bx::read(_reader, chunkOffset, _err);
1865 
1866 			bx::seek(_reader, chunkOffset, bx::Whence::Begin);
1867 
1868 			uint32_t fourcc;
1869 			size += bx::read(_reader, fourcc, _err);
1870 
1871 			uint32_t chunkSize;
1872 			size += bx::read(_reader, chunkSize, _err);
1873 
1874 			switch (fourcc)
1875 			{
1876 			case DXBC_CHUNK_SHADER_EX:
1877 				_dxbc.shader.shex = true;
1878 				BX_FALLTHROUGH;
1879 
1880 			case DXBC_CHUNK_SHADER:
1881 				size += read(_reader, _dxbc.shader, _err);
1882 				break;
1883 
1884 			case BX_MAKEFOURCC('I', 'S', 'G', '1'):
1885 			case DXBC_CHUNK_INPUT_SIGNATURE:
1886 				size += read(_reader, _dxbc.inputSignature, _err);
1887 				break;
1888 
1889 			case BX_MAKEFOURCC('O', 'S', 'G', '1'):
1890 			case BX_MAKEFOURCC('O', 'S', 'G', '5'):
1891 			case DXBC_CHUNK_OUTPUT_SIGNATURE:
1892 				size += read(_reader, _dxbc.outputSignature, _err);
1893 				break;
1894 
1895 			case BX_MAKEFOURCC('A', 'o', 'n', '9'): // Contains DX9BC for feature level 9.x (*s_4_0_level_9_*) shaders.
1896 				_dxbc.shader.aon9 = true;
1897 				break;
1898 
1899 			case BX_MAKEFOURCC('I', 'F', 'C', 'E'): // Interface.
1900 			case BX_MAKEFOURCC('R', 'D', 'E', 'F'): // Resource definition.
1901 			case BX_MAKEFOURCC('S', 'D', 'G', 'B'): // Shader debugging info (old).
1902 			case BX_MAKEFOURCC('S', 'P', 'D', 'B'): // Shader debugging info (new).
1903 			case BX_MAKEFOURCC('S', 'F', 'I', '0'): // ?
1904 			case BX_MAKEFOURCC('S', 'T', 'A', 'T'): // Statistics.
1905 			case BX_MAKEFOURCC('P', 'C', 'S', 'G'): // Patch constant signature.
1906 			case BX_MAKEFOURCC('P', 'S', 'O', '1'): // Pipeline State Object 1
1907 			case BX_MAKEFOURCC('P', 'S', 'O', '2'): // Pipeline State Object 2
1908 			case BX_MAKEFOURCC('X', 'N', 'A', 'P'): // ?
1909 			case BX_MAKEFOURCC('X', 'N', 'A', 'S'): // ?
1910 				size += chunkSize;
1911 				break;
1912 
1913 			default:
1914 				size += chunkSize;
1915 				BX_CHECK(false, "UNKNOWN FOURCC %c%c%c%c %d"
1916 					, ( (char*)&fourcc)[0]
1917 					, ( (char*)&fourcc)[1]
1918 					, ( (char*)&fourcc)[2]
1919 					, ( (char*)&fourcc)[3]
1920 					, size
1921 					);
1922 				break;
1923 			}
1924 		}
1925 
1926 		return size;
1927 	}
1928 
write(bx::WriterSeekerI * _writer,const DxbcContext & _dxbc,bx::Error * _err)1929 	int32_t write(bx::WriterSeekerI* _writer, const DxbcContext& _dxbc, bx::Error* _err)
1930 	{
1931 		int32_t size = 0;
1932 
1933 		int64_t dxbcOffset = bx::seek(_writer);
1934 		size += bx::write(_writer, DXBC_CHUNK_HEADER);
1935 
1936 		size += bx::writeRep(_writer, 0, 16, _err);
1937 
1938 		size += bx::write(_writer, UINT32_C(1), _err);
1939 
1940 		int64_t sizeOffset = bx::seek(_writer);
1941 		size += bx::writeRep(_writer, 0, 4, _err);
1942 
1943 		uint32_t numChunks = 3;
1944 		size += bx::write(_writer, numChunks, _err);
1945 
1946 		int64_t chunksOffsets = bx::seek(_writer);
1947 		size += bx::writeRep(_writer, 0, numChunks*sizeof(uint32_t), _err);
1948 
1949 		uint32_t chunkOffset[3];
1950 		uint32_t chunkSize[3];
1951 
1952 		chunkOffset[0] = uint32_t(bx::seek(_writer) - dxbcOffset);
1953 		size += write(_writer, DXBC_CHUNK_INPUT_SIGNATURE, _err);
1954 		size += write(_writer, UINT32_C(0), _err);
1955 		chunkSize[0] = write(_writer, _dxbc.inputSignature, _err);
1956 
1957 		chunkOffset[1] = uint32_t(bx::seek(_writer) - dxbcOffset);
1958 		size += write(_writer, DXBC_CHUNK_OUTPUT_SIGNATURE, _err);
1959 		size += write(_writer, UINT32_C(0), _err);
1960 		chunkSize[1] = write(_writer, _dxbc.outputSignature, _err);
1961 
1962 		chunkOffset[2] = uint32_t(bx::seek(_writer) - dxbcOffset);
1963 		size += write(_writer, _dxbc.shader.shex ? DXBC_CHUNK_SHADER_EX : DXBC_CHUNK_SHADER, _err);
1964 		size += write(_writer, UINT32_C(0), _err);
1965 		chunkSize[2] = write(_writer, _dxbc.shader, _err);
1966 
1967 		size += 0
1968 			+ chunkSize[0]
1969 			+ chunkSize[1]
1970 			+ chunkSize[2]
1971 			;
1972 
1973 		int64_t eof = bx::seek(_writer);
1974 
1975 		bx::seek(_writer, sizeOffset, bx::Whence::Begin);
1976 		bx::write(_writer, size, _err);
1977 
1978 		bx::seek(_writer, chunksOffsets, bx::Whence::Begin);
1979 		bx::write(_writer, chunkOffset, sizeof(chunkOffset), _err);
1980 
1981 		for (uint32_t ii = 0; ii < BX_COUNTOF(chunkOffset); ++ii)
1982 		{
1983 			bx::seek(_writer, chunkOffset[ii]+4, bx::Whence::Begin);
1984 			bx::write(_writer, chunkSize[ii], _err);
1985 		}
1986 
1987 		bx::seek(_writer, eof, bx::Whence::Begin);
1988 
1989 		return size;
1990 	}
1991 
parse(const DxbcShader & _src,DxbcParseFn _fn,void * _userData,bx::Error * _err)1992 	void parse(const DxbcShader& _src, DxbcParseFn _fn, void* _userData, bx::Error* _err)
1993 	{
1994 		BX_ERROR_SCOPE(_err);
1995 
1996 		bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) );
1997 
1998 		for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;)
1999 		{
2000 			DxbcInstruction instruction;
2001 			uint32_t size = read(&reader, instruction, _err);
2002 			BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length); BX_UNUSED(size);
2003 
2004 			bool cont = _fn(token * sizeof(uint32_t), instruction, _userData);
2005 			if (!cont)
2006 			{
2007 				return;
2008 			}
2009 
2010 			token += instruction.length;
2011 		}
2012 	}
2013 
filter(DxbcShader & _dst,const DxbcShader & _src,DxbcFilterFn _fn,void * _userData,bx::Error * _err)2014 	void filter(DxbcShader& _dst, const DxbcShader& _src, DxbcFilterFn _fn, void* _userData, bx::Error* _err)
2015 	{
2016 		BX_ERROR_SCOPE(_err);
2017 
2018 		bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) );
2019 
2020 		bx::MemoryBlock mb(g_allocator);
2021 		bx::MemoryWriter writer(&mb);
2022 
2023 		int32_t total = 0;
2024 
2025 		for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;)
2026 		{
2027 			DxbcInstruction instruction;
2028 			uint32_t size = read(&reader, instruction, _err);
2029 			BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length); BX_UNUSED(size);
2030 
2031 			_fn(instruction, _userData);
2032 
2033 			bx::SizerWriter sw;
2034 			uint32_t length = instruction.length;
2035 			instruction.length = uint32_t(write(&sw, instruction, _err)/4);
2036 
2037 			total += write(&writer, instruction, _err);
2038 			token += length;
2039 		}
2040 
2041 		uint8_t* data = (uint8_t*)mb.more();
2042 		_dst.byteCode.resize(total);
2043 		bx::memCopy(_dst.byteCode.data(), data, total);
2044 	}
2045 
2046 } // namespace bgfx
2047