1 use auxil::{FastHashMap, FastHashSet, ShaderStage};
2 use hal::{command, image, pso};
3 
4 use winapi::{
5     shared::minwindef::UINT,
6     shared::{
7         dxgiformat,
8         minwindef::{FALSE, TRUE},
9         winerror,
10     },
11     um::{d3d11, d3dcommon},
12 };
13 
14 use wio::com::ComPtr;
15 
16 use std::{mem, ptr};
17 
18 use parking_lot::Mutex;
19 use smallvec::SmallVec;
20 use spirv_cross::{self, hlsl::ShaderModel};
21 
22 use crate::{conv, shader, Buffer, Image, RenderPassCache};
23 
24 #[repr(C)]
25 struct BufferCopy {
26     src: u32,
27     dst: u32,
28     _padding: [u32; 2],
29 }
30 
31 #[repr(C)]
32 struct ImageCopy {
33     src: [u32; 4],
34     dst: [u32; 4],
35 }
36 
37 #[repr(C)]
38 struct BufferImageCopy {
39     buffer_offset: u32,
40     buffer_size: [u32; 2],
41     _padding: u32,
42     image_offset: [u32; 4],
43     image_extent: [u32; 4],
44     // actual size of the target image
45     image_size: [u32; 4],
46 }
47 
48 #[repr(C)]
49 struct BufferImageCopyInfo {
50     buffer: BufferCopy,
51     image: ImageCopy,
52     buffer_image: BufferImageCopy,
53 }
54 
55 #[repr(C)]
56 struct BlitInfo {
57     offset: [f32; 2],
58     extent: [f32; 2],
59     z: f32,
60     level: f32,
61 }
62 
63 #[repr(C)]
64 struct PartialClearInfo {
65     // transmute between the types, easier than juggling all different kinds of fields..
66     data: [u32; 4],
67 }
68 
69 // the threadgroup count we use in our copy shaders
70 const COPY_THREAD_GROUP_X: u32 = 8;
71 const COPY_THREAD_GROUP_Y: u32 = 8;
72 
73 #[derive(Clone, Debug)]
74 struct ComputeCopyBuffer {
75     d1_from_buffer: Option<ComPtr<d3d11::ID3D11ComputeShader>>,
76     // Buffer -> Image2D
77     d2_from_buffer: Option<ComPtr<d3d11::ID3D11ComputeShader>>,
78     // Image2D -> Buffer
79     d2_into_buffer: ComPtr<d3d11::ID3D11ComputeShader>,
80     scale: (u32, u32),
81 }
82 
83 #[derive(Debug)]
84 struct ConstantBuffer {
85     buffer: ComPtr<d3d11::ID3D11Buffer>,
86 }
87 
88 impl ConstantBuffer {
update<T>(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>, data: T)89     unsafe fn update<T>(&mut self, context: &ComPtr<d3d11::ID3D11DeviceContext>, data: T) {
90         let mut mapped = mem::zeroed::<d3d11::D3D11_MAPPED_SUBRESOURCE>();
91         let hr = context.Map(
92             self.buffer.as_raw() as _,
93             0,
94             d3d11::D3D11_MAP_WRITE_DISCARD,
95             0,
96             &mut mapped,
97         );
98         assert_eq!(winerror::S_OK, hr);
99 
100         ptr::copy(&data, mapped.pData as _, 1);
101 
102         context.Unmap(self.buffer.as_raw() as _, 0);
103     }
104 }
105 
106 #[derive(Debug)]
107 struct MissingComputeInternal {
108     // Image<->Image not covered by `CopySubresourceRegion`
109     cs_copy_image_shaders: FastHashSet<(dxgiformat::DXGI_FORMAT, dxgiformat::DXGI_FORMAT)>,
110     // Image -> Buffer and Buffer -> Image shaders
111     cs_copy_buffer_shaders: FastHashSet<dxgiformat::DXGI_FORMAT>,
112 }
113 
114 #[derive(Debug)]
115 struct ComputeInternal {
116     // Image<->Image not covered by `CopySubresourceRegion`
117     cs_copy_image_shaders: FastHashMap<
118         (dxgiformat::DXGI_FORMAT, dxgiformat::DXGI_FORMAT),
119         ComPtr<d3d11::ID3D11ComputeShader>,
120     >,
121     // Image -> Buffer and Buffer -> Image shaders
122     cs_copy_buffer_shaders: FastHashMap<dxgiformat::DXGI_FORMAT, ComputeCopyBuffer>,
123 }
124 
125 #[derive(Debug)]
126 enum PossibleComputeInternal {
127     Available(ComputeInternal),
128     Missing(MissingComputeInternal),
129 }
130 
131 // Holds everything we need for fallback implementations of features that are not in DX.
132 //
133 // TODO: make struct fields more modular and group them up in structs depending on if it is a
134 //       fallback version or not (eg. Option<PartialClear>), should make struct definition and
135 //       `new` function smaller
136 #[derive(Debug)]
137 pub struct Internal {
138     // partial clearing
139     vs_partial_clear: ComPtr<d3d11::ID3D11VertexShader>,
140     ps_partial_clear_float: ComPtr<d3d11::ID3D11PixelShader>,
141     ps_partial_clear_uint: ComPtr<d3d11::ID3D11PixelShader>,
142     ps_partial_clear_int: ComPtr<d3d11::ID3D11PixelShader>,
143     ps_partial_clear_depth: ComPtr<d3d11::ID3D11PixelShader>,
144     ps_partial_clear_stencil: ComPtr<d3d11::ID3D11PixelShader>,
145     partial_clear_depth_stencil_state: ComPtr<d3d11::ID3D11DepthStencilState>,
146     partial_clear_depth_state: ComPtr<d3d11::ID3D11DepthStencilState>,
147     partial_clear_stencil_state: ComPtr<d3d11::ID3D11DepthStencilState>,
148 
149     // blitting
150     vs_blit_2d: ComPtr<d3d11::ID3D11VertexShader>,
151 
152     sampler_nearest: ComPtr<d3d11::ID3D11SamplerState>,
153     sampler_linear: ComPtr<d3d11::ID3D11SamplerState>,
154 
155     ps_blit_2d_uint: ComPtr<d3d11::ID3D11PixelShader>,
156     ps_blit_2d_int: ComPtr<d3d11::ID3D11PixelShader>,
157     ps_blit_2d_float: ComPtr<d3d11::ID3D11PixelShader>,
158 
159     // all compute shader based workarounds, so they can be None when running on an older version without compute shaders.
160     compute_internal: PossibleComputeInternal,
161 
162     // internal constant buffer that is used by internal shaders
163     internal_buffer: Mutex<ConstantBuffer>,
164 
165     // public buffer that is used as intermediate storage for some operations (memory invalidation)
166     pub working_buffer: ComPtr<d3d11::ID3D11Buffer>,
167     pub working_buffer_size: u64,
168 
169     pub constant_buffer_count_buffer:
170         [UINT; d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT as _],
171 
172     /// Command lists are not supported by graphics card and are being emulated.
173     /// Requires various workarounds to make things work correctly.
174     pub command_list_emulation: bool,
175 
176     pub device_features: hal::Features,
177     pub device_feature_level: u32,
178 
179     pub downlevel: hal::DownlevelProperties,
180 }
181 
compile_blob( src: &[u8], entrypoint: &str, stage: ShaderStage, shader_model: ShaderModel, ) -> ComPtr<d3dcommon::ID3DBlob>182 fn compile_blob(
183     src: &[u8],
184     entrypoint: &str,
185     stage: ShaderStage,
186     shader_model: ShaderModel,
187 ) -> ComPtr<d3dcommon::ID3DBlob> {
188     unsafe {
189         ComPtr::from_raw(shader::compile_hlsl_shader(stage, shader_model, entrypoint, src).unwrap())
190     }
191 }
192 
compile_vs( device: &ComPtr<d3d11::ID3D11Device>, src: &[u8], entrypoint: &str, shader_model: ShaderModel, ) -> ComPtr<d3d11::ID3D11VertexShader>193 fn compile_vs(
194     device: &ComPtr<d3d11::ID3D11Device>,
195     src: &[u8],
196     entrypoint: &str,
197     shader_model: ShaderModel,
198 ) -> ComPtr<d3d11::ID3D11VertexShader> {
199     let bytecode = compile_blob(src, entrypoint, ShaderStage::Vertex, shader_model);
200     let mut shader = ptr::null_mut();
201     let hr = unsafe {
202         device.CreateVertexShader(
203             bytecode.GetBufferPointer(),
204             bytecode.GetBufferSize(),
205             ptr::null_mut(),
206             &mut shader as *mut *mut _ as *mut *mut _,
207         )
208     };
209     assert_eq!(true, winerror::SUCCEEDED(hr));
210 
211     unsafe { ComPtr::from_raw(shader) }
212 }
213 
compile_ps( device: &ComPtr<d3d11::ID3D11Device>, src: &[u8], entrypoint: &str, shader_model: ShaderModel, ) -> ComPtr<d3d11::ID3D11PixelShader>214 fn compile_ps(
215     device: &ComPtr<d3d11::ID3D11Device>,
216     src: &[u8],
217     entrypoint: &str,
218     shader_model: ShaderModel,
219 ) -> ComPtr<d3d11::ID3D11PixelShader> {
220     let bytecode = compile_blob(src, entrypoint, ShaderStage::Fragment, shader_model);
221     let mut shader = ptr::null_mut();
222     let hr = unsafe {
223         device.CreatePixelShader(
224             bytecode.GetBufferPointer(),
225             bytecode.GetBufferSize(),
226             ptr::null_mut(),
227             &mut shader as *mut *mut _ as *mut *mut _,
228         )
229     };
230     assert_eq!(true, winerror::SUCCEEDED(hr));
231 
232     unsafe { ComPtr::from_raw(shader) }
233 }
234 
compile_cs( device: &ComPtr<d3d11::ID3D11Device>, src: &[u8], entrypoint: &str, shader_model: ShaderModel, ) -> ComPtr<d3d11::ID3D11ComputeShader>235 fn compile_cs(
236     device: &ComPtr<d3d11::ID3D11Device>,
237     src: &[u8],
238     entrypoint: &str,
239     shader_model: ShaderModel,
240 ) -> ComPtr<d3d11::ID3D11ComputeShader> {
241     let bytecode = compile_blob(src, entrypoint, ShaderStage::Compute, shader_model);
242     let mut shader = ptr::null_mut();
243     let hr = unsafe {
244         device.CreateComputeShader(
245             bytecode.GetBufferPointer(),
246             bytecode.GetBufferSize(),
247             ptr::null_mut(),
248             &mut shader as *mut *mut _ as *mut *mut _,
249         )
250     };
251     assert_eq!(true, winerror::SUCCEEDED(hr));
252 
253     unsafe { ComPtr::from_raw(shader) }
254 }
255 
256 impl Internal {
new( device: &ComPtr<d3d11::ID3D11Device>, device_features: hal::Features, device_feature_level: u32, downlevel: hal::DownlevelProperties, ) -> Self257     pub fn new(
258         device: &ComPtr<d3d11::ID3D11Device>,
259         device_features: hal::Features,
260         device_feature_level: u32,
261         downlevel: hal::DownlevelProperties,
262     ) -> Self {
263         let internal_buffer = {
264             let desc = d3d11::D3D11_BUFFER_DESC {
265                 ByteWidth: mem::size_of::<BufferImageCopyInfo>() as _,
266                 Usage: d3d11::D3D11_USAGE_DYNAMIC,
267                 BindFlags: d3d11::D3D11_BIND_CONSTANT_BUFFER,
268                 CPUAccessFlags: d3d11::D3D11_CPU_ACCESS_WRITE,
269                 MiscFlags: 0,
270                 StructureByteStride: 0,
271             };
272 
273             let mut buffer = ptr::null_mut();
274             let hr = unsafe {
275                 device.CreateBuffer(
276                     &desc,
277                     ptr::null_mut(),
278                     &mut buffer as *mut *mut _ as *mut *mut _,
279                 )
280             };
281             assert_eq!(true, winerror::SUCCEEDED(hr));
282 
283             unsafe { ComPtr::from_raw(buffer) }
284         };
285 
286         let (depth_stencil_state, depth_state, stencil_state) = {
287             let mut depth_state = ptr::null_mut();
288             let mut stencil_state = ptr::null_mut();
289             let mut depth_stencil_state = ptr::null_mut();
290 
291             let mut desc = d3d11::D3D11_DEPTH_STENCIL_DESC {
292                 DepthEnable: TRUE,
293                 DepthWriteMask: d3d11::D3D11_DEPTH_WRITE_MASK_ALL,
294                 DepthFunc: d3d11::D3D11_COMPARISON_ALWAYS,
295                 StencilEnable: TRUE,
296                 StencilReadMask: 0,
297                 StencilWriteMask: !0,
298                 FrontFace: d3d11::D3D11_DEPTH_STENCILOP_DESC {
299                     StencilFailOp: d3d11::D3D11_STENCIL_OP_REPLACE,
300                     StencilDepthFailOp: d3d11::D3D11_STENCIL_OP_REPLACE,
301                     StencilPassOp: d3d11::D3D11_STENCIL_OP_REPLACE,
302                     StencilFunc: d3d11::D3D11_COMPARISON_ALWAYS,
303                 },
304                 BackFace: d3d11::D3D11_DEPTH_STENCILOP_DESC {
305                     StencilFailOp: d3d11::D3D11_STENCIL_OP_REPLACE,
306                     StencilDepthFailOp: d3d11::D3D11_STENCIL_OP_REPLACE,
307                     StencilPassOp: d3d11::D3D11_STENCIL_OP_REPLACE,
308                     StencilFunc: d3d11::D3D11_COMPARISON_ALWAYS,
309                 },
310             };
311 
312             let hr = unsafe {
313                 device.CreateDepthStencilState(
314                     &desc,
315                     &mut depth_stencil_state as *mut *mut _ as *mut *mut _,
316                 )
317             };
318             assert_eq!(winerror::S_OK, hr);
319 
320             desc.DepthEnable = TRUE;
321             desc.StencilEnable = FALSE;
322 
323             let hr = unsafe {
324                 device
325                     .CreateDepthStencilState(&desc, &mut depth_state as *mut *mut _ as *mut *mut _)
326             };
327             assert_eq!(winerror::S_OK, hr);
328 
329             desc.DepthEnable = FALSE;
330             desc.StencilEnable = TRUE;
331 
332             let hr = unsafe {
333                 device.CreateDepthStencilState(
334                     &desc,
335                     &mut stencil_state as *mut *mut _ as *mut *mut _,
336                 )
337             };
338             assert_eq!(winerror::S_OK, hr);
339 
340             unsafe {
341                 (
342                     ComPtr::from_raw(depth_stencil_state),
343                     ComPtr::from_raw(depth_state),
344                     ComPtr::from_raw(stencil_state),
345                 )
346             }
347         };
348 
349         let (sampler_nearest, sampler_linear) = {
350             let mut desc = d3d11::D3D11_SAMPLER_DESC {
351                 Filter: d3d11::D3D11_FILTER_MIN_MAG_MIP_POINT,
352                 AddressU: d3d11::D3D11_TEXTURE_ADDRESS_CLAMP,
353                 AddressV: d3d11::D3D11_TEXTURE_ADDRESS_CLAMP,
354                 AddressW: d3d11::D3D11_TEXTURE_ADDRESS_CLAMP,
355                 MipLODBias: 0f32,
356                 MaxAnisotropy: 0,
357                 ComparisonFunc: 0,
358                 BorderColor: [0f32; 4],
359                 MinLOD: 0f32,
360                 MaxLOD: d3d11::D3D11_FLOAT32_MAX,
361             };
362 
363             let mut nearest = ptr::null_mut();
364             let mut linear = ptr::null_mut();
365 
366             assert_eq!(winerror::S_OK, unsafe {
367                 device.CreateSamplerState(&desc, &mut nearest as *mut *mut _ as *mut *mut _)
368             });
369 
370             desc.Filter = d3d11::D3D11_FILTER_MIN_MAG_MIP_LINEAR;
371 
372             assert_eq!(winerror::S_OK, unsafe {
373                 device.CreateSamplerState(&desc, &mut linear as *mut *mut _ as *mut *mut _)
374             });
375 
376             unsafe { (ComPtr::from_raw(nearest), ComPtr::from_raw(linear)) }
377         };
378 
379         let (working_buffer, working_buffer_size) = {
380             let working_buffer_size = 1 << 16;
381 
382             let desc = d3d11::D3D11_BUFFER_DESC {
383                 ByteWidth: working_buffer_size,
384                 Usage: d3d11::D3D11_USAGE_STAGING,
385                 BindFlags: 0,
386                 CPUAccessFlags: d3d11::D3D11_CPU_ACCESS_READ | d3d11::D3D11_CPU_ACCESS_WRITE,
387                 MiscFlags: 0,
388                 StructureByteStride: 0,
389             };
390             let mut working_buffer = ptr::null_mut();
391 
392             assert_eq!(winerror::S_OK, unsafe {
393                 device.CreateBuffer(
394                     &desc,
395                     ptr::null_mut(),
396                     &mut working_buffer as *mut *mut _ as *mut *mut _,
397                 )
398             });
399 
400             (
401                 unsafe { ComPtr::from_raw(working_buffer) },
402                 working_buffer_size,
403             )
404         };
405 
406         let compute_shaders = if device_feature_level >= d3dcommon::D3D_FEATURE_LEVEL_11_0 {
407             true
408         } else {
409             // FL10 does support compute shaders but we need typed UAVs, which FL10 compute shaders don't support, so we might as well not have them.
410             false
411         };
412 
413         let shader_model = conv::map_feature_level_to_shader_model(device_feature_level);
414 
415         let compute_internal = if compute_shaders {
416             let copy_shaders = include_bytes!("../shaders/copy.hlsl");
417             let mut cs_copy_image_shaders = FastHashMap::default();
418             cs_copy_image_shaders.insert(
419                 (
420                     dxgiformat::DXGI_FORMAT_R8G8_UINT,
421                     dxgiformat::DXGI_FORMAT_R16_UINT,
422                 ),
423                 compile_cs(
424                     device,
425                     copy_shaders,
426                     "cs_copy_image2d_r8g8_image2d_r16",
427                     shader_model,
428                 ),
429             );
430             cs_copy_image_shaders.insert(
431                 (
432                     dxgiformat::DXGI_FORMAT_R16_UINT,
433                     dxgiformat::DXGI_FORMAT_R8G8_UINT,
434                 ),
435                 compile_cs(
436                     device,
437                     copy_shaders,
438                     "cs_copy_image2d_r16_image2d_r8g8",
439                     shader_model,
440                 ),
441             );
442             cs_copy_image_shaders.insert(
443                 (
444                     dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT,
445                     dxgiformat::DXGI_FORMAT_R32_UINT,
446                 ),
447                 compile_cs(
448                     device,
449                     copy_shaders,
450                     "cs_copy_image2d_r8g8b8a8_image2d_r32",
451                     shader_model,
452                 ),
453             );
454             cs_copy_image_shaders.insert(
455                 (
456                     dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT,
457                     dxgiformat::DXGI_FORMAT_R16G16_UINT,
458                 ),
459                 compile_cs(
460                     device,
461                     copy_shaders,
462                     "cs_copy_image2d_r8g8b8a8_image2d_r16g16",
463                     shader_model,
464                 ),
465             );
466             cs_copy_image_shaders.insert(
467                 (
468                     dxgiformat::DXGI_FORMAT_R16G16_UINT,
469                     dxgiformat::DXGI_FORMAT_R32_UINT,
470                 ),
471                 compile_cs(
472                     device,
473                     copy_shaders,
474                     "cs_copy_image2d_r16g16_image2d_r32",
475                     shader_model,
476                 ),
477             );
478             cs_copy_image_shaders.insert(
479                 (
480                     dxgiformat::DXGI_FORMAT_R16G16_UINT,
481                     dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT,
482                 ),
483                 compile_cs(
484                     device,
485                     copy_shaders,
486                     "cs_copy_image2d_r16g16_image2d_r8g8b8a8",
487                     shader_model,
488                 ),
489             );
490             cs_copy_image_shaders.insert(
491                 (
492                     dxgiformat::DXGI_FORMAT_R32_UINT,
493                     dxgiformat::DXGI_FORMAT_R16G16_UINT,
494                 ),
495                 compile_cs(
496                     device,
497                     copy_shaders,
498                     "cs_copy_image2d_r32_image2d_r16g16",
499                     shader_model,
500                 ),
501             );
502             cs_copy_image_shaders.insert(
503                 (
504                     dxgiformat::DXGI_FORMAT_R32_UINT,
505                     dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT,
506                 ),
507                 compile_cs(
508                     device,
509                     copy_shaders,
510                     "cs_copy_image2d_r32_image2d_r8g8b8a8",
511                     shader_model,
512                 ),
513             );
514 
515             let mut cs_copy_buffer_shaders = FastHashMap::default();
516             cs_copy_buffer_shaders.insert(
517                 dxgiformat::DXGI_FORMAT_R32G32B32A32_UINT,
518                 ComputeCopyBuffer {
519                     d1_from_buffer: None,
520                     d2_from_buffer: Some(compile_cs(
521                         device,
522                         copy_shaders,
523                         "cs_copy_buffer_image2d_r32g32b32a32",
524                         shader_model,
525                     )),
526                     d2_into_buffer: compile_cs(
527                         device,
528                         copy_shaders,
529                         "cs_copy_image2d_r32g32b32a32_buffer",
530                         shader_model,
531                     ),
532                     scale: (1, 1),
533                 },
534             );
535             cs_copy_buffer_shaders.insert(
536                 dxgiformat::DXGI_FORMAT_R32G32_UINT,
537                 ComputeCopyBuffer {
538                     d1_from_buffer: None,
539                     d2_from_buffer: Some(compile_cs(
540                         device,
541                         copy_shaders,
542                         "cs_copy_buffer_image2d_r32g32",
543                         shader_model,
544                     )),
545                     d2_into_buffer: compile_cs(
546                         device,
547                         copy_shaders,
548                         "cs_copy_image2d_r32g32_buffer",
549                         shader_model,
550                     ),
551                     scale: (1, 1),
552                 },
553             );
554             cs_copy_buffer_shaders.insert(
555                 dxgiformat::DXGI_FORMAT_R32_UINT,
556                 ComputeCopyBuffer {
557                     d1_from_buffer: Some(compile_cs(
558                         device,
559                         copy_shaders,
560                         "cs_copy_buffer_image1d_r32",
561                         shader_model,
562                     )),
563                     d2_from_buffer: Some(compile_cs(
564                         device,
565                         copy_shaders,
566                         "cs_copy_buffer_image2d_r32",
567                         shader_model,
568                     )),
569                     d2_into_buffer: compile_cs(
570                         device,
571                         copy_shaders,
572                         "cs_copy_image2d_r32_buffer",
573                         shader_model,
574                     ),
575                     scale: (1, 1),
576                 },
577             );
578             cs_copy_buffer_shaders.insert(
579                 dxgiformat::DXGI_FORMAT_R16G16B16A16_UINT,
580                 ComputeCopyBuffer {
581                     d1_from_buffer: None,
582                     d2_from_buffer: Some(compile_cs(
583                         device,
584                         copy_shaders,
585                         "cs_copy_buffer_image2d_r16g16b16a16",
586                         shader_model,
587                     )),
588                     d2_into_buffer: compile_cs(
589                         device,
590                         copy_shaders,
591                         "cs_copy_image2d_r16g16b16a16_buffer",
592                         shader_model,
593                     ),
594                     scale: (1, 1),
595                 },
596             );
597             cs_copy_buffer_shaders.insert(
598                 dxgiformat::DXGI_FORMAT_R16G16_UINT,
599                 ComputeCopyBuffer {
600                     d1_from_buffer: None,
601                     d2_from_buffer: Some(compile_cs(
602                         device,
603                         copy_shaders,
604                         "cs_copy_buffer_image2d_r16g16",
605                         shader_model,
606                     )),
607                     d2_into_buffer: compile_cs(
608                         device,
609                         copy_shaders,
610                         "cs_copy_image2d_r16g16_buffer",
611                         shader_model,
612                     ),
613                     scale: (1, 1),
614                 },
615             );
616             cs_copy_buffer_shaders.insert(
617                 dxgiformat::DXGI_FORMAT_R16_UINT,
618                 ComputeCopyBuffer {
619                     d1_from_buffer: None,
620                     d2_from_buffer: Some(compile_cs(
621                         device,
622                         copy_shaders,
623                         "cs_copy_buffer_image2d_r16",
624                         shader_model,
625                     )),
626                     d2_into_buffer: compile_cs(
627                         device,
628                         copy_shaders,
629                         "cs_copy_image2d_r16_buffer",
630                         shader_model,
631                     ),
632                     scale: (2, 1),
633                 },
634             );
635             cs_copy_buffer_shaders.insert(
636                 dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM,
637                 ComputeCopyBuffer {
638                     d1_from_buffer: None,
639                     d2_from_buffer: None,
640                     d2_into_buffer: compile_cs(
641                         device,
642                         copy_shaders,
643                         "cs_copy_image2d_b8g8r8a8_buffer",
644                         shader_model,
645                     ),
646                     scale: (1, 1),
647                 },
648             );
649             cs_copy_buffer_shaders.insert(
650                 dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT,
651                 ComputeCopyBuffer {
652                     d1_from_buffer: Some(compile_cs(
653                         device,
654                         copy_shaders,
655                         "cs_copy_buffer_image1d_r8g8b8a8",
656                         shader_model,
657                     )),
658                     d2_from_buffer: Some(compile_cs(
659                         device,
660                         copy_shaders,
661                         "cs_copy_buffer_image2d_r8g8b8a8",
662                         shader_model,
663                     )),
664                     d2_into_buffer: compile_cs(
665                         device,
666                         copy_shaders,
667                         "cs_copy_image2d_r8g8b8a8_buffer",
668                         shader_model,
669                     ),
670                     scale: (1, 1),
671                 },
672             );
673             cs_copy_buffer_shaders.insert(
674                 dxgiformat::DXGI_FORMAT_R8G8_UINT,
675                 ComputeCopyBuffer {
676                     d1_from_buffer: Some(compile_cs(
677                         device,
678                         copy_shaders,
679                         "cs_copy_buffer_image1d_r8g8",
680                         shader_model,
681                     )),
682                     d2_from_buffer: Some(compile_cs(
683                         device,
684                         copy_shaders,
685                         "cs_copy_buffer_image2d_r8g8",
686                         shader_model,
687                     )),
688                     d2_into_buffer: compile_cs(
689                         device,
690                         copy_shaders,
691                         "cs_copy_image2d_r8g8_buffer",
692                         shader_model,
693                     ),
694                     scale: (2, 1),
695                 },
696             );
697             cs_copy_buffer_shaders.insert(
698                 dxgiformat::DXGI_FORMAT_R8_UINT,
699                 ComputeCopyBuffer {
700                     d1_from_buffer: Some(compile_cs(
701                         device,
702                         copy_shaders,
703                         "cs_copy_buffer_image1d_r8",
704                         shader_model,
705                     )),
706                     d2_from_buffer: Some(compile_cs(
707                         device,
708                         copy_shaders,
709                         "cs_copy_buffer_image2d_r8",
710                         shader_model,
711                     )),
712                     d2_into_buffer: compile_cs(
713                         device,
714                         copy_shaders,
715                         "cs_copy_image2d_r8_buffer",
716                         shader_model,
717                     ),
718                     scale: (4, 1),
719                 },
720             );
721 
722             PossibleComputeInternal::Available(ComputeInternal {
723                 cs_copy_buffer_shaders,
724                 cs_copy_image_shaders,
725             })
726         } else {
727             let mut cs_copy_image_shaders = FastHashSet::default();
728             cs_copy_image_shaders.insert((
729                 dxgiformat::DXGI_FORMAT_R8G8_UINT,
730                 dxgiformat::DXGI_FORMAT_R16_UINT,
731             ));
732             cs_copy_image_shaders.insert((
733                 dxgiformat::DXGI_FORMAT_R16_UINT,
734                 dxgiformat::DXGI_FORMAT_R8G8_UINT,
735             ));
736             cs_copy_image_shaders.insert((
737                 dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT,
738                 dxgiformat::DXGI_FORMAT_R32_UINT,
739             ));
740             cs_copy_image_shaders.insert((
741                 dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT,
742                 dxgiformat::DXGI_FORMAT_R16G16_UINT,
743             ));
744             cs_copy_image_shaders.insert((
745                 dxgiformat::DXGI_FORMAT_R16G16_UINT,
746                 dxgiformat::DXGI_FORMAT_R32_UINT,
747             ));
748             cs_copy_image_shaders.insert((
749                 dxgiformat::DXGI_FORMAT_R16G16_UINT,
750                 dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT,
751             ));
752             cs_copy_image_shaders.insert((
753                 dxgiformat::DXGI_FORMAT_R32_UINT,
754                 dxgiformat::DXGI_FORMAT_R16G16_UINT,
755             ));
756             cs_copy_image_shaders.insert((
757                 dxgiformat::DXGI_FORMAT_R32_UINT,
758                 dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT,
759             ));
760 
761             let mut cs_copy_buffer_shaders = FastHashSet::default();
762             cs_copy_buffer_shaders.insert(dxgiformat::DXGI_FORMAT_R32G32B32A32_UINT);
763             cs_copy_buffer_shaders.insert(dxgiformat::DXGI_FORMAT_R32G32_UINT);
764             cs_copy_buffer_shaders.insert(dxgiformat::DXGI_FORMAT_R32_UINT);
765             cs_copy_buffer_shaders.insert(dxgiformat::DXGI_FORMAT_R16G16B16A16_UINT);
766             cs_copy_buffer_shaders.insert(dxgiformat::DXGI_FORMAT_R16G16_UINT);
767             cs_copy_buffer_shaders.insert(dxgiformat::DXGI_FORMAT_R16_UINT);
768             cs_copy_buffer_shaders.insert(dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM);
769             cs_copy_buffer_shaders.insert(dxgiformat::DXGI_FORMAT_R8G8B8A8_UINT);
770             cs_copy_buffer_shaders.insert(dxgiformat::DXGI_FORMAT_R8G8_UINT);
771             cs_copy_buffer_shaders.insert(dxgiformat::DXGI_FORMAT_R8_UINT);
772 
773             PossibleComputeInternal::Missing(MissingComputeInternal {
774                 cs_copy_image_shaders,
775                 cs_copy_buffer_shaders,
776             })
777         };
778 
779         let mut threading_capability: d3d11::D3D11_FEATURE_DATA_THREADING =
780             unsafe { mem::zeroed() };
781         let hr = unsafe {
782             device.CheckFeatureSupport(
783                 d3d11::D3D11_FEATURE_THREADING,
784                 &mut threading_capability as *mut _ as *mut _,
785                 mem::size_of::<d3d11::D3D11_FEATURE_DATA_THREADING>() as _,
786             )
787         };
788         assert_eq!(hr, winerror::S_OK);
789 
790         let command_list_emulation = !(threading_capability.DriverCommandLists >= 1);
791         if command_list_emulation {
792             info!("D3D11 command list emulation is active");
793         }
794 
795         let clear_shaders = include_bytes!("../shaders/clear.hlsl");
796         let blit_shaders = include_bytes!("../shaders/blit.hlsl");
797 
798         Internal {
799             vs_partial_clear: compile_vs(device, clear_shaders, "vs_partial_clear", shader_model),
800             ps_partial_clear_float: compile_ps(
801                 device,
802                 clear_shaders,
803                 "ps_partial_clear_float",
804                 shader_model,
805             ),
806             ps_partial_clear_uint: compile_ps(
807                 device,
808                 clear_shaders,
809                 "ps_partial_clear_uint",
810                 shader_model,
811             ),
812             ps_partial_clear_int: compile_ps(
813                 device,
814                 clear_shaders,
815                 "ps_partial_clear_int",
816                 shader_model,
817             ),
818             ps_partial_clear_depth: compile_ps(
819                 device,
820                 clear_shaders,
821                 "ps_partial_clear_depth",
822                 shader_model,
823             ),
824             ps_partial_clear_stencil: compile_ps(
825                 device,
826                 clear_shaders,
827                 "ps_partial_clear_stencil",
828                 shader_model,
829             ),
830             partial_clear_depth_stencil_state: depth_stencil_state,
831             partial_clear_depth_state: depth_state,
832             partial_clear_stencil_state: stencil_state,
833 
834             vs_blit_2d: compile_vs(device, blit_shaders, "vs_blit_2d", shader_model),
835 
836             sampler_nearest,
837             sampler_linear,
838 
839             ps_blit_2d_uint: compile_ps(device, blit_shaders, "ps_blit_2d_uint", shader_model),
840             ps_blit_2d_int: compile_ps(device, blit_shaders, "ps_blit_2d_int", shader_model),
841             ps_blit_2d_float: compile_ps(device, blit_shaders, "ps_blit_2d_float", shader_model),
842 
843             compute_internal,
844 
845             internal_buffer: Mutex::new(ConstantBuffer {
846                 buffer: internal_buffer,
847             }),
848             working_buffer,
849             working_buffer_size: working_buffer_size as _,
850 
851             constant_buffer_count_buffer: [4096_u32;
852                 d3d11::D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT as _],
853 
854             command_list_emulation,
855 
856             device_features,
857             device_feature_level,
858 
859             downlevel,
860         }
861     }
862 
copy_image_2d<T>( &self, context: &ComPtr<d3d11::ID3D11DeviceContext>, src: &Image, dst: &Image, regions: T, ) where T: Iterator<Item = command::ImageCopy>,863     pub fn copy_image_2d<T>(
864         &self,
865         context: &ComPtr<d3d11::ID3D11DeviceContext>,
866         src: &Image,
867         dst: &Image,
868         regions: T,
869     ) where
870         T: Iterator<Item = command::ImageCopy>,
871     {
872         let key = (
873             src.decomposed_format.copy_srv.unwrap(),
874             dst.decomposed_format.copy_srv.unwrap(),
875         );
876 
877         let compute_shader_copy = if let PossibleComputeInternal::Available(ref compute_internal) =
878             self.compute_internal
879         {
880             compute_internal.cs_copy_image_shaders.get(&key)
881         } else if let PossibleComputeInternal::Missing(ref compute_internal) = self.compute_internal
882         {
883             if compute_internal.cs_copy_image_shaders.contains(&key) {
884                 panic!("Tried to copy between images types ({:?} -> {:?}) that require Compute Shaders under FL9 or FL10", src.format, dst.format);
885             } else {
886                 None
887             }
888         } else {
889             None
890         };
891 
892         if let Some(shader) = compute_shader_copy {
893             // Some formats cant go through default path, since they cant
894             // be cast between formats of different component types (eg.
895             // Rg16 <-> Rgba8)
896 
897             // TODO: subresources
898             let srv = src.internal.copy_srv.clone().unwrap().as_raw();
899             let mut const_buf = self.internal_buffer.lock();
900 
901             unsafe {
902                 context.CSSetShader(shader.as_raw(), ptr::null_mut(), 0);
903                 context.CSSetConstantBuffers(0, 1, &const_buf.buffer.as_raw());
904                 context.CSSetShaderResources(0, 1, [srv].as_ptr());
905 
906                 for info in regions {
907                     let image = ImageCopy {
908                         src: [
909                             info.src_offset.x as _,
910                             info.src_offset.y as _,
911                             info.src_offset.z as _,
912                             0,
913                         ],
914                         dst: [
915                             info.dst_offset.x as _,
916                             info.dst_offset.y as _,
917                             info.dst_offset.z as _,
918                             0,
919                         ],
920                     };
921                     const_buf.update(
922                         context,
923                         BufferImageCopyInfo {
924                             image,
925                             ..mem::zeroed()
926                         },
927                     );
928 
929                     let uav = dst.get_uav(info.dst_subresource.level, 0).unwrap().as_raw();
930                     context.CSSetUnorderedAccessViews(0, 1, [uav].as_ptr(), ptr::null_mut());
931 
932                     context.Dispatch(info.extent.width as u32, info.extent.height as u32, 1);
933                 }
934 
935                 // unbind external resources
936                 context.CSSetShaderResources(0, 1, [ptr::null_mut(); 1].as_ptr());
937                 context.CSSetUnorderedAccessViews(
938                     0,
939                     1,
940                     [ptr::null_mut(); 1].as_ptr(),
941                     ptr::null_mut(),
942                 );
943             }
944         } else {
945             // Default copy path
946             for info in regions {
947                 assert_eq!(
948                     src.decomposed_format.typeless, dst.decomposed_format.typeless,
949                     "DX11 backend cannot copy between underlying image formats: {} to {}.",
950                     src.decomposed_format.typeless, dst.decomposed_format.typeless,
951                 );
952 
953                 // Formats are the same per above assert, only need to do it for one of the formats
954                 let full_copy_only =
955                     src.format.is_depth() || src.format.is_stencil() || src.kind.num_samples() > 1;
956 
957                 let copy_box = if full_copy_only {
958                     let offset_zero = info.src_offset.x == 0
959                         && info.src_offset.y == 0
960                         && info.src_offset.z == 0
961                         && info.dst_offset.x == 0
962                         && info.dst_offset.y == 0
963                         && info.dst_offset.z == 0;
964 
965                     let full_extent = info.extent == src.kind.extent();
966 
967                     if !offset_zero || !full_extent {
968                         warn!("image to image copies of depth-stencil or multisampled textures must copy the whole resource. Ignoring non-zero offset or non-full extent.");
969                     }
970 
971                     None
972                 } else {
973                     Some(d3d11::D3D11_BOX {
974                         left: info.src_offset.x as _,
975                         top: info.src_offset.y as _,
976                         front: info.src_offset.z as _,
977                         right: info.src_offset.x as u32 + info.extent.width as u32,
978                         bottom: info.src_offset.y as u32 + info.extent.height as u32,
979                         back: info.src_offset.z as u32 + info.extent.depth as u32,
980                     })
981                 };
982 
983                 // TODO: layer subresources
984                 unsafe {
985                     context.CopySubresourceRegion(
986                         dst.internal.raw,
987                         src.calc_subresource(info.src_subresource.level as _, 0),
988                         info.dst_offset.x as _,
989                         info.dst_offset.y as _,
990                         info.dst_offset.z as _,
991                         src.internal.raw,
992                         dst.calc_subresource(info.dst_subresource.level as _, 0),
993                         copy_box.map_or_else(ptr::null, |b| &b),
994                     );
995                 }
996             }
997         }
998     }
999 
copy_image_to_buffer<T>( &self, context: &ComPtr<d3d11::ID3D11DeviceContext>, src: &Image, dst: &Buffer, regions: T, ) where T: Iterator<Item = command::BufferImageCopy>,1000     pub fn copy_image_to_buffer<T>(
1001         &self,
1002         context: &ComPtr<d3d11::ID3D11DeviceContext>,
1003         src: &Image,
1004         dst: &Buffer,
1005         regions: T,
1006     ) where
1007         T: Iterator<Item = command::BufferImageCopy>,
1008     {
1009         let _scope = debug_scope!(
1010             context,
1011             "Image (format={:?},kind={:?}) => Buffer",
1012             src.format,
1013             src.kind
1014         );
1015 
1016         let shader = if let PossibleComputeInternal::Available(ref compute_internal) =
1017             self.compute_internal
1018         {
1019             compute_internal
1020                 .cs_copy_buffer_shaders
1021                 .get(&src.decomposed_format.copy_srv.unwrap())
1022                 .unwrap_or_else(|| panic!("The DX11 backend does not currently support copying from an image of format {:?} to a buffer", src.format))
1023                 .clone()
1024         } else {
1025             // TODO(cwfitzgerald): Can we fall back to a inherent D3D11 method when copying to a CPU only buffer.
1026             panic!("Tried to copy from an image to a buffer under FL9 or FL10");
1027         };
1028 
1029         let srv = src.internal.copy_srv.clone().unwrap().as_raw();
1030         let uav = dst.internal.uav.unwrap();
1031         let format_desc = src.format.base_format().0.desc();
1032         let bytes_per_texel = format_desc.bits as u32 / 8;
1033         let mut const_buf = self.internal_buffer.lock();
1034 
1035         unsafe {
1036             context.CSSetShader(shader.d2_into_buffer.as_raw(), ptr::null_mut(), 0);
1037             context.CSSetConstantBuffers(0, 1, &const_buf.buffer.as_raw());
1038 
1039             context.CSSetShaderResources(0, 1, [srv].as_ptr());
1040             context.CSSetUnorderedAccessViews(0, 1, [uav].as_ptr(), ptr::null_mut());
1041 
1042             for info in regions {
1043                 let size = src.kind.extent();
1044                 let buffer_image = BufferImageCopy {
1045                     buffer_offset: info.buffer_offset as _,
1046                     buffer_size: [info.buffer_width, info.buffer_height],
1047                     _padding: 0,
1048                     image_offset: [
1049                         info.image_offset.x as _,
1050                         info.image_offset.y as _,
1051                         (info.image_offset.z + info.image_layers.layers.start as i32) as _,
1052                         0,
1053                     ],
1054                     image_extent: [
1055                         info.image_extent.width,
1056                         info.image_extent.height,
1057                         info.image_extent.depth,
1058                         0,
1059                     ],
1060                     image_size: [size.width, size.height, size.depth, 0],
1061                 };
1062 
1063                 const_buf.update(
1064                     context,
1065                     BufferImageCopyInfo {
1066                         buffer_image,
1067                         ..mem::zeroed()
1068                     },
1069                 );
1070 
1071                 debug_marker!(context, "{:?}", info);
1072 
1073                 context.Dispatch(
1074                     ((info.image_extent.width + (COPY_THREAD_GROUP_X - 1))
1075                         / COPY_THREAD_GROUP_X
1076                         / shader.scale.0)
1077                         .max(1),
1078                     ((info.image_extent.height + (COPY_THREAD_GROUP_X - 1))
1079                         / COPY_THREAD_GROUP_Y
1080                         / shader.scale.1)
1081                         .max(1),
1082                     1,
1083                 );
1084 
1085                 if let Some(disjoint_cb) = dst.internal.disjoint_cb {
1086                     let total_size = info.image_extent.depth
1087                         * (info.buffer_height * info.buffer_width * bytes_per_texel);
1088                     let copy_box = d3d11::D3D11_BOX {
1089                         left: info.buffer_offset as u32,
1090                         top: 0,
1091                         front: 0,
1092                         right: info.buffer_offset as u32 + total_size,
1093                         bottom: 1,
1094                         back: 1,
1095                     };
1096 
1097                     context.CopySubresourceRegion(
1098                         disjoint_cb as _,
1099                         0,
1100                         info.buffer_offset as _,
1101                         0,
1102                         0,
1103                         dst.internal.raw as _,
1104                         0,
1105                         &copy_box,
1106                     );
1107                 }
1108             }
1109 
1110             // unbind external resources
1111             context.CSSetShaderResources(0, 1, [ptr::null_mut(); 1].as_ptr());
1112             context.CSSetUnorderedAccessViews(0, 1, [ptr::null_mut(); 1].as_ptr(), ptr::null_mut());
1113         }
1114     }
1115 
copy_buffer_to_image<T>( &self, context: &ComPtr<d3d11::ID3D11DeviceContext>, src: &Buffer, dst: &Image, regions: T, ) where T: Iterator<Item = command::BufferImageCopy>,1116     pub fn copy_buffer_to_image<T>(
1117         &self,
1118         context: &ComPtr<d3d11::ID3D11DeviceContext>,
1119         src: &Buffer,
1120         dst: &Image,
1121         regions: T,
1122     ) where
1123         T: Iterator<Item = command::BufferImageCopy>,
1124     {
1125         let _scope = debug_scope!(
1126             context,
1127             "Buffer => Image (format={:?},kind={:?})",
1128             dst.format,
1129             dst.kind
1130         );
1131 
1132         let compute_internal = match self.compute_internal {
1133             PossibleComputeInternal::Available(ref available) => Some(available),
1134             PossibleComputeInternal::Missing(_) => None,
1135         };
1136 
1137         // NOTE: we have two separate paths for Buffer -> Image transfers. we need to special case
1138         //       uploads to compressed formats through `UpdateSubresource` since we cannot get a
1139         //       UAV of any compressed format.
1140 
1141         let format_desc = dst.format.base_format().0.desc();
1142         let is_compressed = format_desc.is_compressed();
1143         if format_desc.is_compressed() || compute_internal.is_none() {
1144             // we dont really care about non-4x4 block formats..
1145             if is_compressed {
1146                 assert_eq!(format_desc.dim, (4, 4));
1147                 assert!(
1148                     !src.memory_ptr.is_null(),
1149                     "Only CPU to GPU upload of compressed texture is currently supported"
1150                 );
1151             } else if compute_internal.is_none() {
1152                 assert!(
1153                     !src.memory_ptr.is_null(),
1154                     "Only CPU to GPU upload of textures is supported under FL9 or FL10"
1155                 );
1156             }
1157 
1158             for info in regions {
1159                 let bytes_per_texel = format_desc.bits as u32 / 8;
1160 
1161                 let bounds = d3d11::D3D11_BOX {
1162                     left: info.image_offset.x as _,
1163                     top: info.image_offset.y as _,
1164                     front: info.image_offset.z as _,
1165                     right: info.image_offset.x as u32 + info.image_extent.width,
1166                     bottom: info.image_offset.y as u32 + info.image_extent.height,
1167                     back: info.image_offset.z as u32 + info.image_extent.depth,
1168                 };
1169 
1170                 let row_pitch =
1171                     bytes_per_texel * info.image_extent.width / format_desc.dim.0 as u32;
1172                 let depth_pitch = row_pitch * info.image_extent.height / format_desc.dim.1 as u32;
1173 
1174                 for layer in info.image_layers.layers.clone() {
1175                     let layer_offset = layer - info.image_layers.layers.start;
1176 
1177                     unsafe {
1178                         context.UpdateSubresource(
1179                             dst.internal.raw,
1180                             dst.calc_subresource(info.image_layers.level as _, layer as _),
1181                             &bounds,
1182                             src.memory_ptr.offset(
1183                                 src.bound_range.start as isize
1184                                     + info.buffer_offset as isize
1185                                     + depth_pitch as isize * layer_offset as isize,
1186                             ) as _,
1187                             row_pitch,
1188                             depth_pitch,
1189                         );
1190                     }
1191                 }
1192             }
1193         } else {
1194             let shader = compute_internal
1195                 .unwrap()
1196                 .cs_copy_buffer_shaders
1197                 .get(&dst.decomposed_format.copy_uav.unwrap())
1198                 .unwrap_or_else(|| panic!("The DX11 backend does not currently support copying from a buffer to an image of format {:?}", dst.format))
1199                 .clone();
1200 
1201             let srv = src.internal.srv.unwrap();
1202             let mut const_buf = self.internal_buffer.lock();
1203             let shader_raw = match dst.kind {
1204                 image::Kind::D1(..) => shader.d1_from_buffer.unwrap().as_raw(),
1205                 image::Kind::D2(..) => shader.d2_from_buffer.unwrap().as_raw(),
1206                 image::Kind::D3(..) => panic!("Copies into 3D images are not supported"),
1207             };
1208 
1209             unsafe {
1210                 context.CSSetShader(shader_raw, ptr::null_mut(), 0);
1211                 context.CSSetConstantBuffers(0, 1, &const_buf.buffer.as_raw());
1212                 context.CSSetShaderResources(0, 1, [srv].as_ptr());
1213 
1214                 for info in regions {
1215                     let size = dst.kind.extent();
1216                     let buffer_image = BufferImageCopy {
1217                         buffer_offset: info.buffer_offset as _,
1218                         buffer_size: [info.buffer_width, info.buffer_height],
1219                         _padding: 0,
1220                         image_offset: [
1221                             info.image_offset.x as _,
1222                             info.image_offset.y as _,
1223                             (info.image_offset.z + info.image_layers.layers.start as i32) as _,
1224                             0,
1225                         ],
1226                         image_extent: [
1227                             info.image_extent.width,
1228                             info.image_extent.height,
1229                             info.image_extent.depth,
1230                             0,
1231                         ],
1232                         image_size: [size.width, size.height, size.depth, 0],
1233                     };
1234 
1235                     const_buf.update(
1236                         context,
1237                         BufferImageCopyInfo {
1238                             buffer_image,
1239                             ..mem::zeroed()
1240                         },
1241                     );
1242 
1243                     debug_marker!(context, "{:?}", info);
1244 
1245                     // TODO: multiple layers? do we introduce a stride and do multiple dispatch
1246                     //       calls or handle this in the shader? (use z component in dispatch call
1247                     //
1248                     // NOTE: right now our copy UAV is a 2D array, and we set the layer in the
1249                     //       `update_buffer_image` call above
1250                     let uav = dst
1251                         .get_uav(
1252                             info.image_layers.level,
1253                             0, /*info.image_layers.layers.start*/
1254                         )
1255                         .unwrap()
1256                         .as_raw();
1257                     context.CSSetUnorderedAccessViews(0, 1, [uav].as_ptr(), ptr::null_mut());
1258 
1259                     context.Dispatch(
1260                         ((info.image_extent.width + (COPY_THREAD_GROUP_X - 1))
1261                             / COPY_THREAD_GROUP_X
1262                             / shader.scale.0)
1263                             .max(1),
1264                         ((info.image_extent.height + (COPY_THREAD_GROUP_X - 1))
1265                             / COPY_THREAD_GROUP_Y
1266                             / shader.scale.1)
1267                             .max(1),
1268                         1,
1269                     );
1270                 }
1271 
1272                 // unbind external resources
1273                 context.CSSetShaderResources(0, 1, [ptr::null_mut(); 1].as_ptr());
1274                 context.CSSetUnorderedAccessViews(
1275                     0,
1276                     1,
1277                     [ptr::null_mut(); 1].as_ptr(),
1278                     ptr::null_mut(),
1279                 );
1280             }
1281         }
1282     }
1283 
find_blit_shader(&self, src: &Image) -> Option<*mut d3d11::ID3D11PixelShader>1284     fn find_blit_shader(&self, src: &Image) -> Option<*mut d3d11::ID3D11PixelShader> {
1285         use crate::format::ChannelType as Ct;
1286 
1287         match src.format.base_format().1 {
1288             Ct::Uint => Some(self.ps_blit_2d_uint.as_raw()),
1289             Ct::Sint => Some(self.ps_blit_2d_int.as_raw()),
1290             Ct::Unorm | Ct::Snorm | Ct::Sfloat | Ct::Srgb => Some(self.ps_blit_2d_float.as_raw()),
1291             Ct::Ufloat | Ct::Uscaled | Ct::Sscaled => None,
1292         }
1293     }
1294 
blit_2d_image<T>( &self, context: &ComPtr<d3d11::ID3D11DeviceContext>, src: &Image, dst: &Image, filter: image::Filter, regions: T, ) where T: Iterator<Item = command::ImageBlit>,1295     pub fn blit_2d_image<T>(
1296         &self,
1297         context: &ComPtr<d3d11::ID3D11DeviceContext>,
1298         src: &Image,
1299         dst: &Image,
1300         filter: image::Filter,
1301         regions: T,
1302     ) where
1303         T: Iterator<Item = command::ImageBlit>,
1304     {
1305         use std::cmp;
1306 
1307         let _scope = debug_scope!(
1308             context,
1309             "Blit: Image (format={:?},kind={:?}) => Image (format={:?},kind={:?})",
1310             src.format,
1311             src.kind,
1312             dst.format,
1313             dst.kind
1314         );
1315 
1316         let shader = self.find_blit_shader(src).unwrap();
1317 
1318         let srv = src.internal.srv.clone().unwrap().as_raw();
1319         let mut const_buf = self.internal_buffer.lock();
1320 
1321         unsafe {
1322             context.IASetPrimitiveTopology(d3dcommon::D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
1323             context.VSSetShader(self.vs_blit_2d.as_raw(), ptr::null_mut(), 0);
1324             context.VSSetConstantBuffers(0, 1, [const_buf.buffer.as_raw()].as_ptr());
1325             context.PSSetShader(shader, ptr::null_mut(), 0);
1326             context.PSSetShaderResources(0, 1, [srv].as_ptr());
1327             context.PSSetSamplers(
1328                 0,
1329                 1,
1330                 match filter {
1331                     image::Filter::Nearest => [self.sampler_nearest.as_raw()],
1332                     image::Filter::Linear => [self.sampler_linear.as_raw()],
1333                 }
1334                 .as_ptr(),
1335             );
1336 
1337             for info in regions {
1338                 let blit_info = {
1339                     let (sx, dx) = if info.dst_bounds.start.x > info.dst_bounds.end.x {
1340                         (
1341                             info.src_bounds.end.x,
1342                             info.src_bounds.start.x - info.src_bounds.end.x,
1343                         )
1344                     } else {
1345                         (
1346                             info.src_bounds.start.x,
1347                             info.src_bounds.end.x - info.src_bounds.start.x,
1348                         )
1349                     };
1350                     let (sy, dy) = if info.dst_bounds.start.y > info.dst_bounds.end.y {
1351                         (
1352                             info.src_bounds.end.y,
1353                             info.src_bounds.start.y - info.src_bounds.end.y,
1354                         )
1355                     } else {
1356                         (
1357                             info.src_bounds.start.y,
1358                             info.src_bounds.end.y - info.src_bounds.start.y,
1359                         )
1360                     };
1361                     let image::Extent { width, height, .. } =
1362                         src.kind.level_extent(info.src_subresource.level);
1363                     BlitInfo {
1364                         offset: [sx as f32 / width as f32, sy as f32 / height as f32],
1365                         extent: [dx as f32 / width as f32, dy as f32 / height as f32],
1366                         z: 0f32, // TODO
1367                         level: info.src_subresource.level as _,
1368                     }
1369                 };
1370 
1371                 const_buf.update(context, blit_info);
1372 
1373                 // TODO: more layers
1374                 let rtv = dst
1375                     .get_rtv(
1376                         info.dst_subresource.level,
1377                         info.dst_subresource.layers.start,
1378                     )
1379                     .unwrap()
1380                     .as_raw();
1381 
1382                 context.RSSetViewports(
1383                     1,
1384                     [d3d11::D3D11_VIEWPORT {
1385                         TopLeftX: cmp::min(info.dst_bounds.start.x, info.dst_bounds.end.x) as _,
1386                         TopLeftY: cmp::min(info.dst_bounds.start.y, info.dst_bounds.end.y) as _,
1387                         Width: (info.dst_bounds.end.x - info.dst_bounds.start.x).abs() as _,
1388                         Height: (info.dst_bounds.end.y - info.dst_bounds.start.y).abs() as _,
1389                         MinDepth: 0.0f32,
1390                         MaxDepth: 1.0f32,
1391                     }]
1392                     .as_ptr(),
1393                 );
1394                 context.OMSetRenderTargets(1, [rtv].as_ptr(), ptr::null_mut());
1395                 context.Draw(3, 0);
1396             }
1397 
1398             context.PSSetShaderResources(0, 1, [ptr::null_mut()].as_ptr());
1399             context.OMSetRenderTargets(1, [ptr::null_mut()].as_ptr(), ptr::null_mut());
1400         }
1401     }
1402 
clear_attachments<T, U>( &self, context: &ComPtr<d3d11::ID3D11DeviceContext>, clears: T, rects: U, cache: &RenderPassCache, ) where T: Iterator<Item = command::AttachmentClear>, U: Iterator<Item = pso::ClearRect>,1403     pub fn clear_attachments<T, U>(
1404         &self,
1405         context: &ComPtr<d3d11::ID3D11DeviceContext>,
1406         clears: T,
1407         rects: U,
1408         cache: &RenderPassCache,
1409     ) where
1410         T: Iterator<Item = command::AttachmentClear>,
1411         U: Iterator<Item = pso::ClearRect>,
1412     {
1413         use hal::format::ChannelType as Ct;
1414         let _scope = debug_scope!(context, "ClearAttachments");
1415 
1416         let clear_rects: SmallVec<[pso::ClearRect; 8]> = rects.collect();
1417         let mut const_buf = self.internal_buffer.lock();
1418 
1419         unsafe {
1420             context.IASetPrimitiveTopology(d3dcommon::D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
1421             context.IASetInputLayout(ptr::null_mut());
1422             context.VSSetShader(self.vs_partial_clear.as_raw(), ptr::null_mut(), 0);
1423             context.PSSetConstantBuffers(0, 1, [const_buf.buffer.as_raw()].as_ptr());
1424         }
1425 
1426         let subpass = &cache.render_pass.subpasses[cache.current_subpass as usize];
1427 
1428         for clear in clears {
1429             let _scope = debug_scope!(context, "{:?}", clear);
1430 
1431             match clear {
1432                 command::AttachmentClear::Color { index, value } => {
1433                     unsafe {
1434                         const_buf.update(
1435                             context,
1436                             PartialClearInfo {
1437                                 data: mem::transmute(value),
1438                             },
1439                         )
1440                     };
1441 
1442                     let attachment = {
1443                         let rtv_id = subpass.color_attachments[index];
1444                         &cache.attachments[rtv_id.0].view
1445                     };
1446 
1447                     unsafe {
1448                         context.OMSetRenderTargets(
1449                             1,
1450                             [attachment.rtv_handle.unwrap()].as_ptr(),
1451                             ptr::null_mut(),
1452                         );
1453                     }
1454 
1455                     let shader = match attachment.format.base_format().1 {
1456                         Ct::Uint => self.ps_partial_clear_uint.as_raw(),
1457                         Ct::Sint => self.ps_partial_clear_int.as_raw(),
1458                         _ => self.ps_partial_clear_float.as_raw(),
1459                     };
1460                     unsafe { context.PSSetShader(shader, ptr::null_mut(), 0) };
1461 
1462                     for clear_rect in &clear_rects {
1463                         let viewport = conv::map_viewport(&pso::Viewport {
1464                             rect: clear_rect.rect,
1465                             depth: 0f32..1f32,
1466                         });
1467 
1468                         debug_marker!(context, "{:?}", clear_rect.rect);
1469 
1470                         unsafe {
1471                             context.RSSetViewports(1, [viewport].as_ptr());
1472                             context.Draw(3, 0);
1473                         }
1474                     }
1475                 }
1476                 command::AttachmentClear::DepthStencil { depth, stencil } => {
1477                     unsafe {
1478                         const_buf.update(
1479                             context,
1480                             PartialClearInfo {
1481                                 data: [
1482                                     mem::transmute(depth.unwrap_or(0f32)),
1483                                     stencil.unwrap_or(0),
1484                                     0,
1485                                     0,
1486                                 ],
1487                             },
1488                         )
1489                     };
1490 
1491                     let attachment = {
1492                         let dsv_id = subpass.depth_stencil_attachment.unwrap();
1493                         &cache.attachments[dsv_id.0].view
1494                     };
1495 
1496                     unsafe {
1497                         match (depth, stencil) {
1498                             (Some(_), Some(stencil)) => {
1499                                 context.OMSetDepthStencilState(
1500                                     self.partial_clear_depth_stencil_state.as_raw(),
1501                                     stencil,
1502                                 );
1503                                 context.PSSetShader(
1504                                     self.ps_partial_clear_depth.as_raw(),
1505                                     ptr::null_mut(),
1506                                     0,
1507                                 );
1508                             }
1509 
1510                             (Some(_), None) => {
1511                                 context.OMSetDepthStencilState(
1512                                     self.partial_clear_depth_state.as_raw(),
1513                                     0,
1514                                 );
1515                                 context.PSSetShader(
1516                                     self.ps_partial_clear_depth.as_raw(),
1517                                     ptr::null_mut(),
1518                                     0,
1519                                 );
1520                             }
1521 
1522                             (None, Some(stencil)) => {
1523                                 context.OMSetDepthStencilState(
1524                                     self.partial_clear_stencil_state.as_raw(),
1525                                     stencil,
1526                                 );
1527                                 context.PSSetShader(
1528                                     self.ps_partial_clear_stencil.as_raw(),
1529                                     ptr::null_mut(),
1530                                     0,
1531                                 );
1532                             }
1533                             (None, None) => {}
1534                         }
1535 
1536                         context.OMSetRenderTargets(
1537                             0,
1538                             ptr::null_mut(),
1539                             attachment.dsv_handle.unwrap(),
1540                         );
1541                         context.PSSetShader(
1542                             self.ps_partial_clear_depth.as_raw(),
1543                             ptr::null_mut(),
1544                             0,
1545                         );
1546                     }
1547 
1548                     for clear_rect in &clear_rects {
1549                         let viewport = conv::map_viewport(&pso::Viewport {
1550                             rect: clear_rect.rect,
1551                             depth: 0f32..1f32,
1552                         });
1553 
1554                         unsafe {
1555                             context.RSSetViewports(1, [viewport].as_ptr());
1556                             context.Draw(3, 0);
1557                         }
1558                     }
1559                 }
1560             }
1561         }
1562     }
1563 }
1564