1 #[cfg(feature = "pipeline-cache")]
2 use crate::pipeline_cache;
3 use crate::{
4 command, conversions as conv, internal::Channel, native as n, AsNative, Backend, FastHashMap,
5 OnlineRecording, QueueFamily, ResourceIndex, Shared, VisibilityShared,
6 MAX_BOUND_DESCRIPTOR_SETS, MAX_COLOR_ATTACHMENTS,
7 };
8
9 use arrayvec::ArrayVec;
10 use cocoa_foundation::foundation::NSUInteger;
11 use copyless::VecHelper;
12 use foreign_types::{ForeignType, ForeignTypeRef};
13 use hal::{
14 adapter, buffer, device as d, format, image, memory,
15 memory::Properties,
16 pass,
17 pool::CommandPoolCreateFlags,
18 pso,
19 pso::VertexInputRate,
20 query,
21 queue::{QueueFamilyId, QueueGroup, QueuePriority},
22 };
23 use metal::{
24 CaptureManager, MTLCPUCacheMode, MTLLanguageVersion, MTLPrimitiveTopologyClass,
25 MTLPrimitiveType, MTLResourceOptions, MTLSamplerMipFilter, MTLStorageMode, MTLTextureType,
26 MTLVertexStepFunction, NSRange,
27 };
28 use objc::{
29 rc::autoreleasepool,
30 runtime::{Object, BOOL, NO},
31 };
32 use parking_lot::Mutex;
33
34 use std::collections::BTreeMap;
35 #[cfg(feature = "pipeline-cache")]
36 use std::io::Write;
37 use std::{
38 cmp, iter, mem,
39 ops::Range,
40 ptr,
41 sync::{
42 atomic::{AtomicBool, Ordering},
43 Arc,
44 },
45 thread, time,
46 };
47
48 const STRIDE_GRANULARITY: pso::ElemStride = 4; //TODO: work around?
49 const SHADER_STAGE_COUNT: u32 = 3;
50
51 #[derive(Clone, Debug)]
52 enum FunctionError {
53 InvalidEntryPoint,
54 MissingRequiredSpecialization,
55 BadSpecialization,
56 }
57
get_final_function( library: &metal::LibraryRef, entry: &str, specialization: &pso::Specialization, function_specialization: bool, ) -> Result<metal::Function, FunctionError>58 fn get_final_function(
59 library: &metal::LibraryRef,
60 entry: &str,
61 specialization: &pso::Specialization,
62 function_specialization: bool,
63 ) -> Result<metal::Function, FunctionError> {
64 type MTLFunctionConstant = Object;
65 profiling::scope!("get_final_function");
66
67 let mut mtl_function = library.get_function(entry, None).map_err(|e| {
68 error!(
69 "Function retrieval error {:?}. Known names: {:?}",
70 e,
71 library.function_names()
72 );
73 FunctionError::InvalidEntryPoint
74 })?;
75
76 if !function_specialization {
77 if !specialization.data.is_empty() || !specialization.constants.is_empty() {
78 error!("platform does not support specialization");
79 }
80 return Ok(mtl_function);
81 }
82
83 let dictionary = mtl_function.function_constants_dictionary();
84 let count: NSUInteger = unsafe { msg_send![dictionary, count] };
85 if count == 0 {
86 return Ok(mtl_function);
87 }
88
89 let all_values: *mut Object = unsafe { msg_send![dictionary, allValues] };
90
91 let constants = metal::FunctionConstantValues::new();
92 for i in 0..count {
93 let object: *mut MTLFunctionConstant = unsafe { msg_send![all_values, objectAtIndex: i] };
94 let index: NSUInteger = unsafe { msg_send![object, index] };
95 let required: BOOL = unsafe { msg_send![object, required] };
96 match specialization
97 .constants
98 .iter()
99 .find(|c| c.id as NSUInteger == index)
100 {
101 Some(c) => unsafe {
102 let ptr = &specialization.data[c.range.start as usize] as *const u8 as *const _;
103 let ty: metal::MTLDataType = msg_send![object, type];
104 constants.set_constant_value_at_index(ptr, ty, c.id as NSUInteger);
105 },
106 None if required != NO => {
107 //TODO: get name
108 error!("Missing required specialization constant id {}", index);
109 return Err(FunctionError::MissingRequiredSpecialization);
110 }
111 None => {}
112 }
113 }
114
115 mtl_function = library.get_function(entry, Some(constants)).map_err(|e| {
116 error!("Specialized function retrieval error {:?}", e);
117 FunctionError::BadSpecialization
118 })?;
119
120 Ok(mtl_function)
121 }
122
123 impl VisibilityShared {
are_available(&self, pool_base: query::Id, queries: &Range<query::Id>) -> bool124 fn are_available(&self, pool_base: query::Id, queries: &Range<query::Id>) -> bool {
125 unsafe {
126 let availability_ptr = ((self.buffer.contents() as *mut u8)
127 .offset(self.availability_offset as isize)
128 as *mut u32)
129 .offset(pool_base as isize);
130 queries
131 .clone()
132 .all(|id| *availability_ptr.offset(id as isize) != 0)
133 }
134 }
135 }
136
137 struct CompiledShader {
138 library: metal::Library,
139 function: metal::Function,
140 wg_size: metal::MTLSize,
141 rasterizing: bool,
142 sized_bindings: Vec<naga::ResourceBinding>,
143 }
144
145 #[derive(Debug)]
146 pub struct Device {
147 pub(crate) shared: Arc<Shared>,
148 invalidation_queue: command::QueueInner,
149 memory_types: Vec<adapter::MemoryType>,
150 features: hal::Features,
151 pub online_recording: OnlineRecording,
152 #[cfg(any(feature = "pipeline-cache", feature = "cross"))]
153 spv_options: naga::back::spv::Options,
154 }
155 unsafe impl Send for Device {}
156 unsafe impl Sync for Device {}
157
158 bitflags! {
159 /// Memory type bits.
160 struct MemoryTypes: u32 {
161 // = `DEVICE_LOCAL`
162 const PRIVATE = 1<<0;
163 // = `CPU_VISIBLE | COHERENT`
164 const SHARED = 1<<1;
165 // = `DEVICE_LOCAL | CPU_VISIBLE`
166 const MANAGED_UPLOAD = 1<<2;
167 // = `DEVICE_LOCAL | CPU_VISIBLE | CACHED`
168 // Memory range invalidation is implemented to stall the whole pipeline.
169 // It's inefficient, therefore we aren't going to expose this type.
170 //const MANAGED_DOWNLOAD = 1<<3;
171 }
172 }
173
174 impl MemoryTypes {
describe(index: usize) -> (MTLStorageMode, MTLCPUCacheMode)175 fn describe(index: usize) -> (MTLStorageMode, MTLCPUCacheMode) {
176 match Self::from_bits(1 << index).unwrap() {
177 Self::PRIVATE => (MTLStorageMode::Private, MTLCPUCacheMode::DefaultCache),
178 Self::SHARED => (MTLStorageMode::Shared, MTLCPUCacheMode::DefaultCache),
179 Self::MANAGED_UPLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::WriteCombined),
180 //Self::MANAGED_DOWNLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::DefaultCache),
181 _ => unreachable!(),
182 }
183 }
184 }
185
186 #[derive(Debug)]
187 pub struct PhysicalDevice {
188 pub(crate) shared: Arc<Shared>,
189 memory_types: Vec<adapter::MemoryType>,
190 }
191 unsafe impl Send for PhysicalDevice {}
192 unsafe impl Sync for PhysicalDevice {}
193
194 impl PhysicalDevice {
new(shared: Arc<Shared>) -> Self195 pub(crate) fn new(shared: Arc<Shared>) -> Self {
196 let memory_types = if shared.private_caps.os_is_mac {
197 vec![
198 adapter::MemoryType {
199 // PRIVATE
200 properties: Properties::DEVICE_LOCAL,
201 heap_index: 0,
202 },
203 adapter::MemoryType {
204 // SHARED
205 properties: Properties::CPU_VISIBLE | Properties::COHERENT,
206 heap_index: 1,
207 },
208 adapter::MemoryType {
209 // MANAGED_UPLOAD
210 properties: Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE,
211 heap_index: 1,
212 },
213 // MANAGED_DOWNLOAD (removed)
214 ]
215 } else {
216 vec![
217 adapter::MemoryType {
218 // PRIVATE
219 properties: Properties::DEVICE_LOCAL,
220 heap_index: 0,
221 },
222 adapter::MemoryType {
223 // SHARED
224 properties: Properties::CPU_VISIBLE | Properties::COHERENT,
225 heap_index: 1,
226 },
227 ]
228 };
229 PhysicalDevice {
230 shared: shared.clone(),
231 memory_types,
232 }
233 }
234
235 /// Return true if the specified format-swizzle pair is supported natively.
supports_swizzle(&self, format: format::Format, swizzle: format::Swizzle) -> bool236 pub fn supports_swizzle(&self, format: format::Format, swizzle: format::Swizzle) -> bool {
237 self.shared
238 .private_caps
239 .map_format_with_swizzle(format, swizzle)
240 .is_some()
241 }
242 }
243
244 impl adapter::PhysicalDevice<Backend> for PhysicalDevice {
open( &self, families: &[(&QueueFamily, &[QueuePriority])], requested_features: hal::Features, ) -> Result<adapter::Gpu<Backend>, d::CreationError>245 unsafe fn open(
246 &self,
247 families: &[(&QueueFamily, &[QueuePriority])],
248 requested_features: hal::Features,
249 ) -> Result<adapter::Gpu<Backend>, d::CreationError> {
250 use hal::queue::QueueFamily as _;
251
252 // TODO: Query supported features by feature set rather than hard coding in the supported
253 // features. https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
254 if !self.features().contains(requested_features) {
255 warn!(
256 "Features missing: {:?}",
257 requested_features - self.features()
258 );
259 return Err(d::CreationError::MissingFeature);
260 }
261
262 let device = self.shared.device.lock();
263
264 assert_eq!(families.len(), 1);
265 assert_eq!(families[0].1.len(), 1);
266 let mut queue_group = QueueGroup::new(families[0].0.id());
267 for _ in 0..self.shared.private_caps.exposed_queues {
268 queue_group.add_queue(command::Queue::new(self.shared.clone()));
269 }
270
271 #[cfg(any(feature = "pipeline-cache", feature = "cross"))]
272 let spv_options = {
273 use naga::back::spv;
274 let mut flags = spv::WriterFlags::empty();
275 flags.set(spv::WriterFlags::DEBUG, cfg!(debug_assertions));
276 flags.set(
277 spv::WriterFlags::ADJUST_COORDINATE_SPACE,
278 !requested_features.contains(hal::Features::NDC_Y_UP),
279 );
280 spv::Options {
281 lang_version: (1, 0),
282 flags,
283 // doesn't matter since we send it through SPIRV-Cross
284 capabilities: None,
285 }
286 };
287
288 let device = Device {
289 shared: self.shared.clone(),
290 invalidation_queue: command::QueueInner::new(&*device, Some(1)),
291 memory_types: self.memory_types.clone(),
292 features: requested_features,
293 online_recording: OnlineRecording::default(),
294 #[cfg(any(feature = "pipeline-cache", feature = "cross"))]
295 spv_options,
296 };
297
298 Ok(adapter::Gpu {
299 device,
300 queue_groups: vec![queue_group],
301 })
302 }
303
format_properties(&self, format: Option<format::Format>) -> format::Properties304 fn format_properties(&self, format: Option<format::Format>) -> format::Properties {
305 match format {
306 Some(format) => self.shared.private_caps.map_format_properties(format),
307 None => format::Properties {
308 linear_tiling: format::ImageFeature::empty(),
309 optimal_tiling: format::ImageFeature::empty(),
310 buffer_features: format::BufferFeature::empty(),
311 },
312 }
313 }
314
image_format_properties( &self, format: format::Format, dimensions: u8, tiling: image::Tiling, usage: image::Usage, view_caps: image::ViewCapabilities, ) -> Option<image::FormatProperties>315 fn image_format_properties(
316 &self,
317 format: format::Format,
318 dimensions: u8,
319 tiling: image::Tiling,
320 usage: image::Usage,
321 view_caps: image::ViewCapabilities,
322 ) -> Option<image::FormatProperties> {
323 if let image::Tiling::Linear = tiling {
324 let format_desc = format.surface_desc();
325 let host_usage = image::Usage::TRANSFER_SRC | image::Usage::TRANSFER_DST;
326 if dimensions != 2
327 || !view_caps.is_empty()
328 || !host_usage.contains(usage)
329 || format_desc.aspects != format::Aspects::COLOR
330 || format_desc.is_compressed()
331 {
332 return None;
333 }
334 }
335 if dimensions == 1
336 && usage
337 .intersects(image::Usage::COLOR_ATTACHMENT | image::Usage::DEPTH_STENCIL_ATTACHMENT)
338 {
339 // MTLRenderPassDescriptor texture must not be MTLTextureType1D
340 return None;
341 }
342 if dimensions == 3 && view_caps.contains(image::ViewCapabilities::KIND_2D_ARRAY) {
343 // Can't create 2D/2DArray views of 3D textures
344 return None;
345 }
346 let max_dimension = if dimensions == 3 {
347 self.shared.private_caps.max_texture_3d_size as _
348 } else {
349 self.shared.private_caps.max_texture_size as _
350 };
351
352 let max_extent = image::Extent {
353 width: max_dimension,
354 height: if dimensions >= 2 { max_dimension } else { 1 },
355 depth: if dimensions >= 3 { max_dimension } else { 1 },
356 };
357
358 self.shared
359 .private_caps
360 .map_format(format)
361 .map(|_| image::FormatProperties {
362 max_extent,
363 max_levels: if dimensions == 1 { 1 } else { 12 },
364 // 3D images enforce a single layer
365 max_layers: if dimensions == 3 {
366 1
367 } else {
368 self.shared.private_caps.max_texture_layers as _
369 },
370 sample_count_mask: self.shared.private_caps.sample_count_mask as _,
371 //TODO: buffers and textures have separate limits
372 // Max buffer size is determined by feature set
373 // Max texture size does not appear to be documented publicly
374 max_resource_size: self.shared.private_caps.max_buffer_size as _,
375 })
376 }
377
memory_properties(&self) -> adapter::MemoryProperties378 fn memory_properties(&self) -> adapter::MemoryProperties {
379 adapter::MemoryProperties {
380 memory_heaps: vec![
381 adapter::MemoryHeap {
382 size: !0, //TODO: private memory limits
383 flags: memory::HeapFlags::DEVICE_LOCAL,
384 },
385 adapter::MemoryHeap {
386 size: self.shared.private_caps.max_buffer_size,
387 flags: memory::HeapFlags::empty(),
388 },
389 ],
390 memory_types: self.memory_types.to_vec(),
391 }
392 }
393
features(&self) -> hal::Features394 fn features(&self) -> hal::Features {
395 use hal::Features as F;
396 let mut features = F::FULL_DRAW_INDEX_U32
397 | F::INDEPENDENT_BLENDING
398 | F::DRAW_INDIRECT_FIRST_INSTANCE
399 | F::DEPTH_CLAMP
400 | F::SAMPLER_ANISOTROPY
401 | F::FORMAT_BC
402 | F::PRECISE_OCCLUSION_QUERY
403 | F::SHADER_STORAGE_BUFFER_ARRAY_DYNAMIC_INDEXING
404 | F::VERTEX_STORES_AND_ATOMICS
405 | F::FRAGMENT_STORES_AND_ATOMICS
406 | F::INSTANCE_RATE
407 | F::SEPARATE_STENCIL_REF_VALUES
408 | F::SHADER_CLIP_DISTANCE
409 | F::MUTABLE_UNNORMALIZED_SAMPLER
410 | F::NDC_Y_UP;
411
412 features.set(
413 F::IMAGE_CUBE_ARRAY,
414 self.shared.private_caps.texture_cube_array,
415 );
416 features.set(
417 F::DUAL_SRC_BLENDING,
418 self.shared.private_caps.dual_source_blending,
419 );
420 features.set(
421 F::NON_FILL_POLYGON_MODE,
422 self.shared.private_caps.expose_line_mode,
423 );
424 if self.shared.private_caps.msl_version >= MTLLanguageVersion::V2_0 {
425 features |= F::TEXTURE_DESCRIPTOR_ARRAY
426 | F::SHADER_SAMPLED_IMAGE_ARRAY_DYNAMIC_INDEXING
427 | F::SAMPLED_TEXTURE_DESCRIPTOR_INDEXING
428 | F::STORAGE_TEXTURE_DESCRIPTOR_INDEXING;
429 }
430 features.set(
431 F::SAMPLER_BORDER_COLOR,
432 self.shared.private_caps.sampler_clamp_to_border,
433 );
434 features.set(
435 F::MUTABLE_COMPARISON_SAMPLER,
436 self.shared.private_caps.mutable_comparison_samplers,
437 );
438
439 //TODO: F::DEPTH_BOUNDS
440 //TODO: F::SAMPLER_MIRROR_CLAMP_EDGE
441 features
442 }
443
properties(&self) -> hal::PhysicalDeviceProperties444 fn properties(&self) -> hal::PhysicalDeviceProperties {
445 let pc = &self.shared.private_caps;
446 let device = self.shared.device.lock();
447
448 let mut caveats = hal::PerformanceCaveats::empty();
449 if !self.shared.private_caps.base_vertex_instance_drawing {
450 caveats |= hal::PerformanceCaveats::BASE_VERTEX_INSTANCE_DRAWING;
451 }
452 hal::PhysicalDeviceProperties {
453 limits: hal::Limits {
454 max_image_1d_size: pc.max_texture_size as _,
455 max_image_2d_size: pc.max_texture_size as _,
456 max_image_3d_size: pc.max_texture_3d_size as _,
457 max_image_cube_size: pc.max_texture_size as _,
458 max_image_array_layers: pc.max_texture_layers as _,
459 max_texel_elements: (pc.max_texture_size * pc.max_texture_size) as usize,
460 max_uniform_buffer_range: pc.max_buffer_size,
461 max_storage_buffer_range: pc.max_buffer_size,
462 // "Maximum length of an inlined constant data buffer, per graphics or compute function"
463 max_push_constants_size: 0x1000,
464 max_sampler_allocation_count: !0,
465 max_bound_descriptor_sets: MAX_BOUND_DESCRIPTOR_SETS as _,
466 descriptor_limits: hal::DescriptorLimits {
467 max_per_stage_descriptor_samplers: pc.max_samplers_per_stage,
468 max_per_stage_descriptor_uniform_buffers: pc.max_buffers_per_stage,
469 max_per_stage_descriptor_storage_buffers: pc.max_buffers_per_stage,
470 max_per_stage_descriptor_sampled_images: pc
471 .max_textures_per_stage
472 .min(pc.max_samplers_per_stage)
473 as u32,
474 max_per_stage_descriptor_storage_images: pc.max_textures_per_stage,
475 max_per_stage_descriptor_input_attachments: pc.max_textures_per_stage, //TODO
476 max_per_stage_resources: 0x100, //TODO
477 max_descriptor_set_samplers: pc.max_samplers_per_stage * SHADER_STAGE_COUNT,
478 max_descriptor_set_uniform_buffers: pc.max_buffers_per_stage
479 * SHADER_STAGE_COUNT,
480 max_descriptor_set_uniform_buffers_dynamic: 8 * SHADER_STAGE_COUNT,
481 max_descriptor_set_storage_buffers: pc.max_buffers_per_stage
482 * SHADER_STAGE_COUNT,
483 max_descriptor_set_storage_buffers_dynamic: 4 * SHADER_STAGE_COUNT,
484 max_descriptor_set_sampled_images: pc
485 .max_textures_per_stage
486 .min(pc.max_samplers_per_stage)
487 * SHADER_STAGE_COUNT,
488 max_descriptor_set_storage_images: pc.max_textures_per_stage
489 * SHADER_STAGE_COUNT,
490 max_descriptor_set_input_attachments: pc.max_textures_per_stage
491 * SHADER_STAGE_COUNT,
492 },
493 max_fragment_input_components: pc.max_fragment_input_components as usize,
494 max_framebuffer_layers: 2048, // TODO: Determine is this is the correct value
495 max_memory_allocation_count: 4096, // TODO: Determine is this is the correct value
496
497 max_patch_size: 0, // No tessellation
498
499 // Note: The maximum number of supported viewports and scissor rectangles varies by device.
500 // TODO: read from Metal Feature Sets.
501 max_viewports: 1,
502 max_viewport_dimensions: [pc.max_texture_size as _; 2],
503 max_framebuffer_extent: hal::image::Extent {
504 //TODO
505 width: pc.max_texture_size as _,
506 height: pc.max_texture_size as _,
507 depth: pc.max_texture_layers as _,
508 },
509 min_memory_map_alignment: 4,
510
511 optimal_buffer_copy_offset_alignment: pc.buffer_alignment,
512 optimal_buffer_copy_pitch_alignment: 4,
513 min_texel_buffer_offset_alignment: pc.buffer_alignment,
514 min_uniform_buffer_offset_alignment: pc.buffer_alignment,
515 min_storage_buffer_offset_alignment: pc.buffer_alignment,
516
517 max_compute_work_group_count: [!0; 3], // really undefined
518 max_compute_work_group_size: {
519 let size = device.max_threads_per_threadgroup();
520 [size.width as u32, size.height as u32, size.depth as u32]
521 },
522 max_compute_shared_memory_size: pc.max_total_threadgroup_memory as usize,
523
524 max_vertex_input_attributes: 31,
525 max_vertex_input_bindings: 31,
526 max_vertex_input_attribute_offset: 255, // TODO
527 max_vertex_input_binding_stride: 256, // TODO
528 max_vertex_output_components: pc.max_fragment_input_components as usize,
529
530 framebuffer_color_sample_counts: 0b101, // TODO
531 framebuffer_depth_sample_counts: 0b101, // TODO
532 framebuffer_stencil_sample_counts: 0b101, // TODO
533 max_color_attachments: pc.max_color_render_targets as usize,
534
535 buffer_image_granularity: 1,
536 // Note: we issue Metal buffer-to-buffer copies on memory flush/invalidate,
537 // and those need to operate on sizes being multiples of 4.
538 non_coherent_atom_size: 4,
539 max_sampler_anisotropy: 16.,
540 min_vertex_input_binding_stride_alignment: STRIDE_GRANULARITY as u64,
541
542 ..hal::Limits::default() // TODO!
543 },
544 downlevel: hal::DownlevelProperties::all_enabled(),
545 performance_caveats: caveats,
546 dynamic_pipeline_states: hal::DynamicStates::all(),
547
548 ..hal::PhysicalDeviceProperties::default()
549 }
550 }
551 }
552
553 pub struct LanguageVersion {
554 pub major: u8,
555 pub minor: u8,
556 }
557
558 impl LanguageVersion {
new(major: u8, minor: u8) -> Self559 pub fn new(major: u8, minor: u8) -> Self {
560 LanguageVersion { major, minor }
561 }
562 }
563
564 impl Device {
_is_heap_coherent(&self, heap: &n::MemoryHeap) -> bool565 fn _is_heap_coherent(&self, heap: &n::MemoryHeap) -> bool {
566 match *heap {
567 n::MemoryHeap::Private => false,
568 n::MemoryHeap::Public(memory_type, _) => self.memory_types[memory_type.0]
569 .properties
570 .contains(Properties::COHERENT),
571 n::MemoryHeap::Native(ref heap) => heap.storage_mode() == MTLStorageMode::Shared,
572 }
573 }
574
575 #[cfg(feature = "cross")]
compile_shader_library_cross( device: &Mutex<metal::Device>, raw_data: &[u32], compiler_options: &spirv_cross::msl::CompilerOptions, msl_version: MTLLanguageVersion, specialization: &pso::Specialization, stage: naga::ShaderStage, ) -> Result<n::ModuleInfo, String>576 fn compile_shader_library_cross(
577 device: &Mutex<metal::Device>,
578 raw_data: &[u32],
579 compiler_options: &spirv_cross::msl::CompilerOptions,
580 msl_version: MTLLanguageVersion,
581 specialization: &pso::Specialization,
582 stage: naga::ShaderStage,
583 ) -> Result<n::ModuleInfo, String> {
584 use spirv_cross::ErrorCode as Ec;
585 profiling::scope!("compile_shader_library_cross");
586
587 // now parse again using the new overrides
588 let mut ast = {
589 profiling::scope!("spvc::parse");
590 let module = spirv_cross::spirv::Module::from_words(raw_data);
591 spirv_cross::spirv::Ast::<spirv_cross::msl::Target>::parse(&module).map_err(|err| {
592 match err {
593 Ec::CompilationError(msg) => msg,
594 Ec::Unhandled => "Unexpected parse error".into(),
595 }
596 })?
597 };
598
599 auxil::spirv_cross_specialize_ast(&mut ast, specialization)?;
600
601 ast.set_compiler_options(compiler_options)
602 .map_err(|err| match err {
603 Ec::CompilationError(msg) => msg,
604 Ec::Unhandled => "Unexpected error".into(),
605 })?;
606
607 let entry_points = ast.get_entry_points().map_err(|err| match err {
608 Ec::CompilationError(msg) => msg,
609 Ec::Unhandled => "Unexpected entry point error".into(),
610 })?;
611
612 let shader_code = {
613 profiling::scope!("spvc::compile");
614 ast.compile().map_err(|err| match err {
615 Ec::CompilationError(msg) => msg,
616 Ec::Unhandled => "Unknown compile error".into(),
617 })?
618 };
619
620 let mut entry_point_map = n::EntryPointMap::default();
621 for entry_point in entry_points {
622 info!("Entry point {:?}", entry_point);
623 let cleansed = ast
624 .get_cleansed_entry_point_name(&entry_point.name, entry_point.execution_model)
625 .map_err(|err| match err {
626 Ec::CompilationError(msg) => msg,
627 Ec::Unhandled => "Unknown compile error".into(),
628 })?;
629 entry_point_map.insert(
630 (stage, entry_point.name),
631 n::EntryPoint {
632 //TODO: should we try to do better?
633 internal_name: Ok(cleansed),
634 work_group_size: [
635 entry_point.work_group_size.x,
636 entry_point.work_group_size.y,
637 entry_point.work_group_size.z,
638 ],
639 },
640 );
641 }
642
643 let rasterization_enabled = ast
644 .is_rasterization_enabled()
645 .map_err(|_| "Unknown compile error".to_string())?;
646
647 // done
648 debug!("SPIRV-Cross generated shader:\n{}", shader_code);
649 let options = metal::CompileOptions::new();
650 options.set_language_version(msl_version);
651
652 let library = {
653 profiling::scope!("Metal::new_library_with_source");
654 device
655 .lock()
656 .new_library_with_source(shader_code.as_ref(), &options)
657 .map_err(|err| err.to_string())?
658 };
659
660 Ok(n::ModuleInfo {
661 library,
662 entry_point_map,
663 rasterization_enabled,
664 })
665 }
666
compile_shader_library_naga( device: &Mutex<metal::Device>, shader: &d::NagaShader, naga_options: &naga::back::msl::Options, pipeline_options: &naga::back::msl::PipelineOptions, #[cfg(feature = "pipeline-cache")] spv_hash: u64, #[cfg(feature = "pipeline-cache")] spv_to_msl_cache: Option<&pipeline_cache::SpvToMsl>, ) -> Result<n::ModuleInfo, String>667 fn compile_shader_library_naga(
668 device: &Mutex<metal::Device>,
669 shader: &d::NagaShader,
670 naga_options: &naga::back::msl::Options,
671 pipeline_options: &naga::back::msl::PipelineOptions,
672 #[cfg(feature = "pipeline-cache")] spv_hash: u64,
673 #[cfg(feature = "pipeline-cache")] spv_to_msl_cache: Option<&pipeline_cache::SpvToMsl>,
674 ) -> Result<n::ModuleInfo, String> {
675 profiling::scope!("compile_shader_library_naga");
676
677 let get_module_info = || {
678 profiling::scope!("naga::msl::write_string");
679
680 let (source, info) = match naga::back::msl::write_string(
681 &shader.module,
682 &shader.info,
683 naga_options,
684 pipeline_options,
685 ) {
686 Ok(pair) => pair,
687 Err(e) => {
688 warn!("Naga: {:?}", e);
689 return Err(format!("MSL: {:?}", e));
690 }
691 };
692
693 let mut entry_point_map = n::EntryPointMap::default();
694 for (ep, internal_name) in shader
695 .module
696 .entry_points
697 .iter()
698 .zip(info.entry_point_names)
699 {
700 entry_point_map.insert(
701 (ep.stage, ep.name.clone()),
702 n::EntryPoint {
703 internal_name,
704 work_group_size: ep.workgroup_size,
705 },
706 );
707 }
708
709 debug!("Naga generated shader:\n{}", source);
710
711 Ok(n::SerializableModuleInfo {
712 source,
713 entry_point_map,
714 rasterization_enabled: true, //TODO
715 })
716 };
717
718 #[cfg(feature = "pipeline-cache")]
719 let module_info = if let Some(spv_to_msl_cache) = spv_to_msl_cache {
720 let key = pipeline_cache::SpvToMslKey {
721 options: naga_options.clone(),
722 pipeline_options: pipeline_options.clone(),
723 spv_hash,
724 };
725
726 spv_to_msl_cache
727 .get_or_create_with(&key, || get_module_info().unwrap())
728 .clone()
729 } else {
730 get_module_info()?
731 };
732
733 #[cfg(not(feature = "pipeline-cache"))]
734 let module_info = get_module_info()?;
735
736 let options = metal::CompileOptions::new();
737 let msl_version = match naga_options.lang_version {
738 (1, 0) => MTLLanguageVersion::V1_0,
739 (1, 1) => MTLLanguageVersion::V1_1,
740 (1, 2) => MTLLanguageVersion::V1_2,
741 (2, 0) => MTLLanguageVersion::V2_0,
742 (2, 1) => MTLLanguageVersion::V2_1,
743 (2, 2) => MTLLanguageVersion::V2_2,
744 (2, 3) => MTLLanguageVersion::V2_3,
745 other => panic!("Unexpected language version {:?}", other),
746 };
747 options.set_language_version(msl_version);
748
749 let library = {
750 profiling::scope!("Metal::new_library_with_source");
751 device
752 .lock()
753 .new_library_with_source(module_info.source.as_ref(), &options)
754 .map_err(|err| {
755 warn!("Naga generated shader:\n{}", module_info.source);
756 warn!("Failed to compile: {}", err);
757 format!("{:?}", err)
758 })?
759 };
760
761 Ok(n::ModuleInfo {
762 library,
763 entry_point_map: module_info.entry_point_map,
764 rasterization_enabled: module_info.rasterization_enabled,
765 })
766 }
767
768 #[cfg_attr(not(feature = "pipeline-cache"), allow(unused_variables))]
load_shader( &self, ep: &pso::EntryPoint<Backend>, layout: &n::PipelineLayout, primitive_class: MTLPrimitiveTopologyClass, pipeline_cache: Option<&n::PipelineCache>, stage: naga::ShaderStage, ) -> Result<CompiledShader, pso::CreationError>769 fn load_shader(
770 &self,
771 ep: &pso::EntryPoint<Backend>,
772 layout: &n::PipelineLayout,
773 primitive_class: MTLPrimitiveTopologyClass,
774 pipeline_cache: Option<&n::PipelineCache>,
775 stage: naga::ShaderStage,
776 ) -> Result<CompiledShader, pso::CreationError> {
777 let _profiling_tag = match stage {
778 naga::ShaderStage::Vertex => "vertex",
779 naga::ShaderStage::Fragment => "fragment",
780 naga::ShaderStage::Compute => "compute",
781 };
782 profiling::scope!("load_shader", _profiling_tag);
783
784 let device = &self.shared.device;
785
786 #[cfg(feature = "cross")]
787 let mut compiler_options = layout.spirv_cross_options.clone();
788 #[cfg(feature = "cross")]
789 {
790 compiler_options.entry_point =
791 Some((ep.entry.to_string(), conv::map_naga_stage_to_cross(stage)));
792 compiler_options.enable_point_size_builtin =
793 primitive_class == MTLPrimitiveTopologyClass::Point;
794 }
795 let pipeline_options = naga::back::msl::PipelineOptions {
796 allow_point_size: match primitive_class {
797 MTLPrimitiveTopologyClass::Point => true,
798 _ => false,
799 },
800 };
801
802 let info = {
803 #[cfg_attr(not(feature = "cross"), allow(unused_mut))]
804 let mut result = match ep.module.naga {
805 Ok(ref shader) => Self::compile_shader_library_naga(
806 device,
807 shader,
808 &layout.naga_options,
809 &pipeline_options,
810 #[cfg(feature = "pipeline-cache")]
811 ep.module.spv_hash,
812 #[cfg(feature = "pipeline-cache")]
813 pipeline_cache.as_ref().map(|cache| &cache.spv_to_msl),
814 ),
815 Err(ref e) => Err(e.clone()),
816 };
817
818 #[cfg(feature = "cross")]
819 if result.is_err() {
820 result = Self::compile_shader_library_cross(
821 device,
822 &ep.module.spv,
823 &compiler_options,
824 self.shared.private_caps.msl_version,
825 &ep.specialization,
826 stage,
827 );
828 }
829 result.map_err(|e| {
830 let error = format!("Error compiling the shader {:?}", e);
831 pso::CreationError::ShaderCreationError(stage.into(), error)
832 })?
833 };
834
835 // collect sizes indices
836 let mut sized_bindings = Vec::new();
837 if let Ok(ref shader) = ep.module.naga {
838 for (_handle, var) in shader.module.global_variables.iter() {
839 if let naga::TypeInner::Struct { ref members, .. } =
840 shader.module.types[var.ty].inner
841 {
842 if let Some(member) = members.last() {
843 if let naga::TypeInner::Array {
844 size: naga::ArraySize::Dynamic,
845 ..
846 } = shader.module.types[member.ty].inner
847 {
848 // Note: unwraps are fine, since the MSL is already generated
849 let br = var.binding.clone().unwrap();
850 sized_bindings.push(br);
851 }
852 }
853 }
854 }
855 }
856
857 let lib = info.library.clone();
858 let entry_key = (stage, ep.entry.to_string());
859 //TODO: avoid heap-allocating the string?
860 let (name, wg_size) = match info.entry_point_map.get(&entry_key) {
861 Some(p) => (
862 match p.internal_name {
863 Ok(ref name) => name.as_str(),
864 Err(ref e) => {
865 return Err(pso::CreationError::ShaderCreationError(
866 stage.into(),
867 format!("{}", e),
868 ))
869 }
870 },
871 metal::MTLSize {
872 width: p.work_group_size[0] as _,
873 height: p.work_group_size[1] as _,
874 depth: p.work_group_size[2] as _,
875 },
876 ),
877 // this can only happen if the shader came directly from the user
878 None => (
879 ep.entry,
880 metal::MTLSize {
881 width: 0,
882 height: 0,
883 depth: 0,
884 },
885 ),
886 };
887 let mtl_function = get_final_function(
888 &lib,
889 name,
890 &ep.specialization,
891 self.shared.private_caps.function_specialization,
892 )
893 .map_err(|e| {
894 let error = format!("Invalid shader entry point '{}': {:?}", name, e);
895 pso::CreationError::ShaderCreationError(stage.into(), error)
896 })?;
897
898 Ok(CompiledShader {
899 library: lib,
900 function: mtl_function,
901 wg_size,
902 rasterizing: info.rasterization_enabled,
903 sized_bindings,
904 })
905 }
906
make_sampler_descriptor( &self, info: &image::SamplerDesc, ) -> Option<metal::SamplerDescriptor>907 fn make_sampler_descriptor(
908 &self,
909 info: &image::SamplerDesc,
910 ) -> Option<metal::SamplerDescriptor> {
911 let caps = &self.shared.private_caps;
912 let descriptor = metal::SamplerDescriptor::new();
913
914 descriptor.set_normalized_coordinates(info.normalized);
915
916 descriptor.set_min_filter(conv::map_filter(info.min_filter));
917 descriptor.set_mag_filter(conv::map_filter(info.mag_filter));
918 descriptor.set_mip_filter(match info.mip_filter {
919 // Note: this shouldn't be required, but Metal appears to be confused when mipmaps
920 // are provided even with trivial LOD bias.
921 image::Filter::Nearest if info.lod_range.end.0 < 0.5 => {
922 MTLSamplerMipFilter::NotMipmapped
923 }
924 image::Filter::Nearest => MTLSamplerMipFilter::Nearest,
925 image::Filter::Linear => MTLSamplerMipFilter::Linear,
926 });
927
928 if let Some(aniso) = info.anisotropy_clamp {
929 descriptor.set_max_anisotropy(aniso as _);
930 }
931
932 let (s, t, r) = info.wrap_mode;
933 descriptor.set_address_mode_s(conv::map_wrap_mode(s));
934 descriptor.set_address_mode_t(conv::map_wrap_mode(t));
935 descriptor.set_address_mode_r(conv::map_wrap_mode(r));
936
937 let lod_bias = info.lod_bias.0;
938 if lod_bias != 0.0 {
939 if self.features.contains(hal::Features::SAMPLER_MIP_LOD_BIAS) {
940 unsafe {
941 descriptor.set_lod_bias(lod_bias);
942 }
943 } else {
944 error!("Lod bias {:?} is not supported", info.lod_bias);
945 }
946 }
947 descriptor.set_lod_min_clamp(info.lod_range.start.0);
948 descriptor.set_lod_max_clamp(info.lod_range.end.0);
949
950 // TODO: Clarify minimum macOS version with Apple (43707452)
951 if (caps.os_is_mac && caps.has_version_at_least(10, 13))
952 || (!caps.os_is_mac && caps.has_version_at_least(9, 0))
953 {
954 descriptor.set_lod_average(true); // optimization
955 }
956
957 if let Some(fun) = info.comparison {
958 if !caps.mutable_comparison_samplers {
959 return None;
960 }
961 descriptor.set_compare_function(conv::map_compare_function(fun));
962 }
963 if [r, s, t].iter().any(|&am| am == image::WrapMode::Border) {
964 descriptor.set_border_color(conv::map_border_color(info.border));
965 }
966
967 if caps.argument_buffers {
968 descriptor.set_support_argument_buffers(true);
969 }
970
971 Some(descriptor)
972 }
973 }
974
975 impl hal::device::Device<Backend> for Device {
create_command_pool( &self, _family: QueueFamilyId, _flags: CommandPoolCreateFlags, ) -> Result<command::CommandPool, d::OutOfMemory>976 unsafe fn create_command_pool(
977 &self,
978 _family: QueueFamilyId,
979 _flags: CommandPoolCreateFlags,
980 ) -> Result<command::CommandPool, d::OutOfMemory> {
981 Ok(command::CommandPool::new(
982 &self.shared,
983 self.online_recording.clone(),
984 ))
985 }
986
destroy_command_pool(&self, mut pool: command::CommandPool)987 unsafe fn destroy_command_pool(&self, mut pool: command::CommandPool) {
988 use hal::pool::CommandPool as _;
989 pool.reset(false);
990 }
991
create_render_pass<'a, Ia, Is, Id>( &self, attachments: Ia, subpasses: Is, _dependencies: Id, ) -> Result<n::RenderPass, d::OutOfMemory> where Ia: Iterator<Item = pass::Attachment>, Is: Iterator<Item = pass::SubpassDesc<'a>>,992 unsafe fn create_render_pass<'a, Ia, Is, Id>(
993 &self,
994 attachments: Ia,
995 subpasses: Is,
996 _dependencies: Id,
997 ) -> Result<n::RenderPass, d::OutOfMemory>
998 where
999 Ia: Iterator<Item = pass::Attachment>,
1000 Is: Iterator<Item = pass::SubpassDesc<'a>>,
1001 {
1002 let attachments: Vec<pass::Attachment> = attachments.collect();
1003
1004 let mut subpasses: Vec<n::Subpass> = subpasses
1005 .map(|sub| {
1006 let mut colors: ArrayVec<[_; MAX_COLOR_ATTACHMENTS]> = sub
1007 .colors
1008 .iter()
1009 .map(|&(id, _)| {
1010 let hal_format = attachments[id].format.expect("No format!");
1011 n::AttachmentInfo {
1012 id,
1013 resolve_id: None,
1014 ops: n::AttachmentOps::empty(),
1015 format: self
1016 .shared
1017 .private_caps
1018 .map_format(hal_format)
1019 .expect("Unable to map color format!"),
1020 channel: Channel::from(hal_format.base_format().1),
1021 }
1022 })
1023 .collect();
1024 for (color, &(resolve_id, _)) in colors.iter_mut().zip(sub.resolves.iter()) {
1025 if resolve_id != pass::ATTACHMENT_UNUSED {
1026 color.resolve_id = Some(resolve_id);
1027 }
1028 }
1029 let depth_stencil = sub.depth_stencil.map(|&(id, _)| {
1030 let hal_format = attachments[id].format.expect("No format!");
1031 n::AttachmentInfo {
1032 id,
1033 resolve_id: None,
1034 ops: n::AttachmentOps::empty(),
1035 format: self
1036 .shared
1037 .private_caps
1038 .map_format(hal_format)
1039 .expect("Unable to map depth-stencil format!"),
1040 channel: Channel::Float,
1041 }
1042 });
1043
1044 let samples = colors
1045 .iter()
1046 .chain(depth_stencil.as_ref())
1047 .map(|at_info| attachments[at_info.id].samples)
1048 .max()
1049 .unwrap_or(1);
1050
1051 n::Subpass {
1052 attachments: n::SubpassData {
1053 colors,
1054 depth_stencil,
1055 },
1056 inputs: sub.inputs.iter().map(|&(id, _)| id).collect(),
1057 samples,
1058 }
1059 })
1060 .collect();
1061
1062 // sprinkle load operations
1063 // an attachment receives LOAD flag on a subpass if it's the first sub-pass that uses it
1064 let mut use_mask = 0u64;
1065 for sub in subpasses.iter_mut() {
1066 for at in sub.attachments.colors.iter_mut() {
1067 if use_mask & 1 << at.id == 0 {
1068 at.ops |= n::AttachmentOps::LOAD;
1069 use_mask ^= 1 << at.id;
1070 }
1071 }
1072 if let Some(ref mut at) = sub.attachments.depth_stencil {
1073 if use_mask & 1 << at.id == 0 {
1074 at.ops |= n::AttachmentOps::LOAD;
1075 use_mask ^= 1 << at.id;
1076 }
1077 }
1078 }
1079 // sprinkle store operations
1080 // an attachment receives STORE flag on a subpass if it's the last sub-pass that uses it
1081 for sub in subpasses.iter_mut().rev() {
1082 for at in sub.attachments.colors.iter_mut() {
1083 if use_mask & 1 << at.id != 0 {
1084 at.ops |= n::AttachmentOps::STORE;
1085 use_mask ^= 1 << at.id;
1086 }
1087 }
1088 if let Some(ref mut at) = sub.attachments.depth_stencil {
1089 if use_mask & 1 << at.id != 0 {
1090 at.ops |= n::AttachmentOps::STORE;
1091 use_mask ^= 1 << at.id;
1092 }
1093 }
1094 }
1095
1096 Ok(n::RenderPass {
1097 attachments,
1098 subpasses,
1099 name: String::new(),
1100 })
1101 }
1102
create_pipeline_layout<'a, Is, Ic>( &self, set_layouts: Is, push_constant_ranges: Ic, ) -> Result<n::PipelineLayout, d::OutOfMemory> where Is: Iterator<Item = &'a n::DescriptorSetLayout>, Ic: Iterator<Item = (pso::ShaderStageFlags, Range<u32>)>,1103 unsafe fn create_pipeline_layout<'a, Is, Ic>(
1104 &self,
1105 set_layouts: Is,
1106 push_constant_ranges: Ic,
1107 ) -> Result<n::PipelineLayout, d::OutOfMemory>
1108 where
1109 Is: Iterator<Item = &'a n::DescriptorSetLayout>,
1110 Ic: Iterator<Item = (pso::ShaderStageFlags, Range<u32>)>,
1111 {
1112 #[derive(Debug)]
1113 struct StageInfo {
1114 stage: naga::ShaderStage,
1115 counters: n::ResourceData<ResourceIndex>,
1116 push_constant_buffer: Option<ResourceIndex>,
1117 sizes_buffer: Option<ResourceIndex>,
1118 sizes_count: u8,
1119 }
1120 let mut stage_infos = [
1121 StageInfo {
1122 stage: naga::ShaderStage::Vertex,
1123 counters: n::ResourceData::new(),
1124 push_constant_buffer: None,
1125 sizes_buffer: None,
1126 sizes_count: 0,
1127 },
1128 StageInfo {
1129 stage: naga::ShaderStage::Fragment,
1130 counters: n::ResourceData::new(),
1131 push_constant_buffer: None,
1132 sizes_buffer: None,
1133 sizes_count: 0,
1134 },
1135 StageInfo {
1136 stage: naga::ShaderStage::Compute,
1137 counters: n::ResourceData::new(),
1138 push_constant_buffer: None,
1139 sizes_buffer: None,
1140 sizes_count: 0,
1141 },
1142 ];
1143 let mut binding_map = BTreeMap::default();
1144 let mut argument_buffer_bindings = FastHashMap::default();
1145 let mut inline_samplers = Vec::new();
1146 #[cfg(feature = "cross")]
1147 let mut cross_const_samplers = BTreeMap::new();
1148 let mut infos = Vec::new();
1149
1150 // First, place the push constants
1151 let mut pc_limits = [0u32; 3];
1152 for (flags, range) in push_constant_ranges {
1153 for (limit, info) in pc_limits.iter_mut().zip(&stage_infos) {
1154 if flags.contains(info.stage.into()) {
1155 debug_assert_eq!(range.end % 4, 0);
1156 *limit = (range.end / 4).max(*limit);
1157 }
1158 }
1159 }
1160
1161 const LIMIT_MASK: u32 = 3;
1162 // round up the limits alignment to 4, so that it matches MTL compiler logic
1163 //TODO: figure out what and how exactly does the alignment. Clearly, it's not
1164 // straightforward, given that value of 2 stays non-aligned.
1165 for limit in &mut pc_limits {
1166 if *limit > LIMIT_MASK {
1167 *limit = (*limit + LIMIT_MASK) & !LIMIT_MASK;
1168 }
1169 }
1170
1171 for (limit, info) in pc_limits.iter().zip(stage_infos.iter_mut()) {
1172 // handle the push constant buffer assignment and shader overrides
1173 if *limit != 0 {
1174 info.push_constant_buffer = Some(info.counters.buffers);
1175 info.counters.buffers += 1;
1176 }
1177 }
1178
1179 // Second, place the descripted resources
1180 for (set_index, set_layout) in set_layouts.enumerate() {
1181 // remember where the resources for this set start at each shader stage
1182 let mut dynamic_buffers = Vec::new();
1183 let mut sized_buffer_bindings = Vec::new();
1184 let offsets = n::MultiStageResourceCounters {
1185 vs: stage_infos[0].counters.clone(),
1186 ps: stage_infos[1].counters.clone(),
1187 cs: stage_infos[2].counters.clone(),
1188 };
1189
1190 match *set_layout {
1191 n::DescriptorSetLayout::Emulated {
1192 layouts: ref desc_layouts,
1193 ref immutable_samplers,
1194 ..
1195 } => {
1196 #[cfg(feature = "cross")]
1197 for (&binding, immutable_sampler) in immutable_samplers.iter() {
1198 //TODO: array support?
1199 cross_const_samplers.insert(
1200 spirv_cross::msl::SamplerLocation {
1201 desc_set: set_index as u32,
1202 binding,
1203 },
1204 immutable_sampler.cross_data.clone(),
1205 );
1206 }
1207 for layout in desc_layouts.iter() {
1208 if layout.content.contains(n::DescriptorContent::SIZED_BUFFER) {
1209 sized_buffer_bindings.push((layout.binding, layout.stages));
1210 if layout.stages.contains(pso::ShaderStageFlags::VERTEX) {
1211 stage_infos[0].sizes_count += 1;
1212 }
1213 if layout.stages.contains(pso::ShaderStageFlags::FRAGMENT) {
1214 stage_infos[1].sizes_count += 1;
1215 }
1216 if layout.stages.contains(pso::ShaderStageFlags::COMPUTE) {
1217 stage_infos[2].sizes_count += 1;
1218 }
1219 }
1220
1221 if layout
1222 .content
1223 .contains(n::DescriptorContent::DYNAMIC_BUFFER)
1224 {
1225 dynamic_buffers.alloc().init(n::MultiStageData {
1226 vs: if layout.stages.contains(pso::ShaderStageFlags::VERTEX) {
1227 stage_infos[0].counters.buffers
1228 } else {
1229 !0
1230 },
1231 ps: if layout.stages.contains(pso::ShaderStageFlags::FRAGMENT) {
1232 stage_infos[1].counters.buffers
1233 } else {
1234 !0
1235 },
1236 cs: if layout.stages.contains(pso::ShaderStageFlags::COMPUTE) {
1237 stage_infos[2].counters.buffers
1238 } else {
1239 !0
1240 },
1241 });
1242 }
1243
1244 for info in stage_infos.iter_mut() {
1245 if !layout.stages.contains(info.stage.into()) {
1246 continue;
1247 }
1248 let target = naga::back::msl::BindTarget {
1249 buffer: if layout.content.contains(n::DescriptorContent::BUFFER) {
1250 Some(info.counters.buffers as _)
1251 } else {
1252 None
1253 },
1254 texture: if layout.content.contains(n::DescriptorContent::TEXTURE) {
1255 Some(info.counters.textures as _)
1256 } else {
1257 None
1258 },
1259 sampler: if layout
1260 .content
1261 .contains(n::DescriptorContent::IMMUTABLE_SAMPLER)
1262 {
1263 let immutable_sampler = &immutable_samplers[&layout.binding];
1264 let handle = inline_samplers.len()
1265 as naga::back::msl::InlineSamplerIndex;
1266 inline_samplers.push(immutable_sampler.data.clone());
1267 Some(naga::back::msl::BindSamplerTarget::Inline(handle))
1268 } else if layout.content.contains(n::DescriptorContent::SAMPLER) {
1269 Some(naga::back::msl::BindSamplerTarget::Resource(
1270 info.counters.samplers as _,
1271 ))
1272 } else {
1273 None
1274 },
1275 mutable: layout.content.contains(n::DescriptorContent::WRITABLE),
1276 };
1277 info.counters.add(layout.content);
1278 if layout.array_index == 0 {
1279 let source = naga::back::msl::BindSource {
1280 stage: info.stage,
1281 group: set_index as _,
1282 binding: layout.binding,
1283 };
1284 binding_map.insert(source, target);
1285 }
1286 }
1287 }
1288 }
1289 n::DescriptorSetLayout::ArgumentBuffer {
1290 bindings: _,
1291 stage_flags,
1292 ..
1293 } => {
1294 for info in stage_infos.iter_mut() {
1295 if !stage_flags.contains(info.stage.into()) {
1296 continue;
1297 }
1298 //TODO: mark `bindings` as belonging to the argument buffer
1299 argument_buffer_bindings
1300 .insert((info.stage, set_index as u32), info.counters.buffers);
1301 info.counters.buffers += 1;
1302 }
1303 }
1304 }
1305
1306 infos.alloc().init(n::DescriptorSetInfo {
1307 offsets,
1308 dynamic_buffers,
1309 sized_buffer_bindings,
1310 });
1311 }
1312
1313 // Finally, make sure we fit the limits
1314 for info in stage_infos.iter_mut() {
1315 // handle the sizes buffer assignment and shader overrides
1316 if info.sizes_count != 0 {
1317 info.sizes_buffer = Some(info.counters.buffers);
1318 info.counters.buffers += 1;
1319 }
1320 if info.counters.buffers > self.shared.private_caps.max_buffers_per_stage
1321 || info.counters.textures > self.shared.private_caps.max_textures_per_stage
1322 || info.counters.samplers > self.shared.private_caps.max_samplers_per_stage
1323 {
1324 log::error!("Resource limit exceeded: {:?}", info);
1325 return Err(d::OutOfMemory::Host);
1326 }
1327 }
1328
1329 #[cfg(feature = "cross")]
1330 let spirv_cross_options = {
1331 use spirv_cross::msl;
1332 const PUSH_CONSTANTS_DESC_SET: u32 = !0;
1333 const PUSH_CONSTANTS_DESC_BINDING: u32 = 0;
1334
1335 let mut compiler_options = msl::CompilerOptions::default();
1336 compiler_options.version = match self.shared.private_caps.msl_version {
1337 MTLLanguageVersion::V1_0 => msl::Version::V1_0,
1338 MTLLanguageVersion::V1_1 => msl::Version::V1_1,
1339 MTLLanguageVersion::V1_2 => msl::Version::V1_2,
1340 MTLLanguageVersion::V2_0 => msl::Version::V2_0,
1341 MTLLanguageVersion::V2_1 => msl::Version::V2_1,
1342 MTLLanguageVersion::V2_2 => msl::Version::V2_2,
1343 MTLLanguageVersion::V2_3 => msl::Version::V2_3,
1344 };
1345 compiler_options.enable_point_size_builtin = false;
1346 compiler_options.vertex.invert_y = !self.features.contains(hal::Features::NDC_Y_UP);
1347 // populate resource overrides
1348 for (source, target) in binding_map.iter() {
1349 compiler_options.resource_binding_overrides.insert(
1350 msl::ResourceBindingLocation {
1351 stage: conv::map_naga_stage_to_cross(source.stage),
1352 desc_set: source.group,
1353 binding: source.binding,
1354 },
1355 msl::ResourceBinding {
1356 buffer_id: target.buffer.map_or(!0, |id| id as u32),
1357 texture_id: target.texture.map_or(!0, |id| id as u32),
1358 sampler_id: match target.sampler {
1359 Some(naga::back::msl::BindSamplerTarget::Resource(id)) => id as u32,
1360 _ => !0,
1361 },
1362 count: 0,
1363 },
1364 );
1365 }
1366 // argument buffers
1367 for ((stage, desc_set), buffer_id) in argument_buffer_bindings {
1368 compiler_options.resource_binding_overrides.insert(
1369 msl::ResourceBindingLocation {
1370 stage: conv::map_naga_stage_to_cross(stage),
1371 desc_set,
1372 binding: msl::ARGUMENT_BUFFER_BINDING,
1373 },
1374 msl::ResourceBinding {
1375 buffer_id,
1376 texture_id: !0,
1377 sampler_id: !0,
1378 count: 0,
1379 },
1380 );
1381 //TODO: assign argument buffer locations
1382 }
1383 // push constants
1384 for info in stage_infos.iter() {
1385 let buffer_id = match info.push_constant_buffer {
1386 Some(id) => id,
1387 None => continue,
1388 };
1389 compiler_options.resource_binding_overrides.insert(
1390 msl::ResourceBindingLocation {
1391 stage: conv::map_naga_stage_to_cross(info.stage),
1392 desc_set: PUSH_CONSTANTS_DESC_SET,
1393 binding: PUSH_CONSTANTS_DESC_BINDING,
1394 },
1395 msl::ResourceBinding {
1396 buffer_id,
1397 texture_id: !0,
1398 sampler_id: !0,
1399 count: 0,
1400 },
1401 );
1402 }
1403 // other properties
1404 compiler_options.const_samplers = cross_const_samplers;
1405 compiler_options.enable_argument_buffers = self.shared.private_caps.argument_buffers;
1406 compiler_options.force_zero_initialized_variables = true;
1407 compiler_options.force_native_arrays = true;
1408
1409 let mut compiler_options_point = compiler_options.clone();
1410 compiler_options_point.enable_point_size_builtin = true;
1411 compiler_options
1412 };
1413
1414 let naga_options = naga::back::msl::Options {
1415 lang_version: match self.shared.private_caps.msl_version {
1416 MTLLanguageVersion::V1_0 => (1, 0),
1417 MTLLanguageVersion::V1_1 => (1, 1),
1418 MTLLanguageVersion::V1_2 => (1, 2),
1419 MTLLanguageVersion::V2_0 => (2, 0),
1420 MTLLanguageVersion::V2_1 => (2, 1),
1421 MTLLanguageVersion::V2_2 => (2, 2),
1422 MTLLanguageVersion::V2_3 => (2, 3),
1423 },
1424 binding_map,
1425 inline_samplers,
1426 spirv_cross_compatibility: cfg!(feature = "cross"),
1427 fake_missing_bindings: false,
1428 per_stage_map: naga::back::msl::PerStageMap {
1429 vs: naga::back::msl::PerStageResources {
1430 push_constant_buffer: stage_infos[0]
1431 .push_constant_buffer
1432 .map(|buffer_index| buffer_index as naga::back::msl::Slot),
1433 sizes_buffer: stage_infos[0]
1434 .sizes_buffer
1435 .map(|buffer_index| buffer_index as naga::back::msl::Slot),
1436 },
1437 fs: naga::back::msl::PerStageResources {
1438 push_constant_buffer: stage_infos[1]
1439 .push_constant_buffer
1440 .map(|buffer_index| buffer_index as naga::back::msl::Slot),
1441 sizes_buffer: stage_infos[1]
1442 .sizes_buffer
1443 .map(|buffer_index| buffer_index as naga::back::msl::Slot),
1444 },
1445 cs: naga::back::msl::PerStageResources {
1446 push_constant_buffer: stage_infos[2]
1447 .push_constant_buffer
1448 .map(|buffer_index| buffer_index as naga::back::msl::Slot),
1449 sizes_buffer: stage_infos[2]
1450 .sizes_buffer
1451 .map(|buffer_index| buffer_index as naga::back::msl::Slot),
1452 },
1453 },
1454 };
1455
1456 Ok(n::PipelineLayout {
1457 #[cfg(feature = "cross")]
1458 spirv_cross_options,
1459 naga_options,
1460 infos,
1461 total: n::MultiStageResourceCounters {
1462 vs: stage_infos[0].counters.clone(),
1463 ps: stage_infos[1].counters.clone(),
1464 cs: stage_infos[2].counters.clone(),
1465 },
1466 push_constants: n::MultiStageData {
1467 vs: stage_infos[0]
1468 .push_constant_buffer
1469 .map(|buffer_index| n::PushConstantInfo {
1470 count: pc_limits[0],
1471 buffer_index,
1472 }),
1473 ps: stage_infos[1]
1474 .push_constant_buffer
1475 .map(|buffer_index| n::PushConstantInfo {
1476 count: pc_limits[1],
1477 buffer_index,
1478 }),
1479 cs: stage_infos[2]
1480 .push_constant_buffer
1481 .map(|buffer_index| n::PushConstantInfo {
1482 count: pc_limits[2],
1483 buffer_index,
1484 }),
1485 },
1486 total_push_constants: pc_limits[0].max(pc_limits[1]).max(pc_limits[2]),
1487 })
1488 }
1489
1490 #[cfg(not(feature = "pipeline-cache"))]
create_pipeline_cache( &self, _data: Option<&[u8]>, ) -> Result<n::PipelineCache, d::OutOfMemory>1491 unsafe fn create_pipeline_cache(
1492 &self,
1493 _data: Option<&[u8]>,
1494 ) -> Result<n::PipelineCache, d::OutOfMemory> {
1495 Ok(())
1496 }
1497
1498 #[cfg(feature = "pipeline-cache")]
create_pipeline_cache( &self, data: Option<&[u8]>, ) -> Result<n::PipelineCache, d::OutOfMemory>1499 unsafe fn create_pipeline_cache(
1500 &self,
1501 data: Option<&[u8]>,
1502 ) -> Result<n::PipelineCache, d::OutOfMemory> {
1503 let device = self.shared.device.lock();
1504
1505 let create_binary_archive = |data: &[u8]| {
1506 if self.shared.private_caps.supports_binary_archives {
1507 let descriptor = metal::BinaryArchiveDescriptor::new();
1508
1509 // We need to keep the temp file alive so that it doesn't get deleted until after a
1510 // binary archive has been created.
1511 let _temp_file = if !data.is_empty() {
1512 // It would be nice to use a `data:text/plain;base64` url here and just pass in a
1513 // base64-encoded version of the data, but metal validation doesn't like that:
1514 // -[MTLDebugDevice newBinaryArchiveWithDescriptor:error:]:1046: failed assertion `url, if not nil, must be a file URL.'
1515
1516 let temp_file = tempfile::NamedTempFile::new().unwrap();
1517 temp_file.as_file().write_all(&data).unwrap();
1518
1519 let url = metal::URL::new_with_string(&format!(
1520 "file://{}",
1521 temp_file.path().display()
1522 ));
1523 descriptor.set_url(&url);
1524
1525 Some(temp_file)
1526 } else {
1527 None
1528 };
1529
1530 Ok(Some(pipeline_cache::BinaryArchive {
1531 inner: device
1532 .new_binary_archive_with_descriptor(&descriptor)
1533 .map_err(|_| d::OutOfMemory::Device)?,
1534 is_empty: AtomicBool::new(data.is_empty()),
1535 }))
1536 } else {
1537 Ok(None)
1538 }
1539 };
1540
1541 if let Some(data) = data.filter(|data| !data.is_empty()) {
1542 let pipeline_cache: pipeline_cache::SerializablePipelineCache =
1543 bincode::deserialize(data).unwrap();
1544
1545 Ok(n::PipelineCache {
1546 binary_archive: create_binary_archive(&pipeline_cache.binary_archive)?,
1547 spv_to_msl: pipeline_cache::load_spv_to_msl_cache(pipeline_cache.spv_to_msl),
1548 })
1549 } else {
1550 Ok(n::PipelineCache {
1551 binary_archive: create_binary_archive(&[])?,
1552 spv_to_msl: Default::default(),
1553 })
1554 }
1555 }
1556
1557 #[cfg(not(feature = "pipeline-cache"))]
get_pipeline_cache_data( &self, _cache: &n::PipelineCache, ) -> Result<Vec<u8>, d::OutOfMemory>1558 unsafe fn get_pipeline_cache_data(
1559 &self,
1560 _cache: &n::PipelineCache,
1561 ) -> Result<Vec<u8>, d::OutOfMemory> {
1562 Ok(Vec::new())
1563 }
1564
1565 #[cfg(feature = "pipeline-cache")]
get_pipeline_cache_data( &self, cache: &n::PipelineCache, ) -> Result<Vec<u8>, d::OutOfMemory>1566 unsafe fn get_pipeline_cache_data(
1567 &self,
1568 cache: &n::PipelineCache,
1569 ) -> Result<Vec<u8>, d::OutOfMemory> {
1570 let binary_archive = || {
1571 let binary_archive = match cache.binary_archive {
1572 Some(ref binary_archive) => binary_archive,
1573 None => return Ok(Vec::new()),
1574 };
1575
1576 // Without this, we get an extremely vague "Serialization of binaries to file failed"
1577 // error when serializing an empty binary archive.
1578 if binary_archive.is_empty.load(Ordering::Relaxed) {
1579 return Ok(Vec::new());
1580 }
1581
1582 let temp_path = tempfile::NamedTempFile::new().unwrap().into_temp_path();
1583 let tmp_file_url =
1584 metal::URL::new_with_string(&format!("file://{}", temp_path.display()));
1585
1586 binary_archive
1587 .inner
1588 .serialize_to_url(&tmp_file_url)
1589 .unwrap();
1590
1591 let bytes = std::fs::read(&temp_path).unwrap();
1592 Ok(bytes)
1593 };
1594
1595 Ok(
1596 bincode::serialize(&pipeline_cache::SerializablePipelineCache {
1597 binary_archive: &binary_archive()?,
1598 spv_to_msl: pipeline_cache::serialize_spv_to_msl_cache(&cache.spv_to_msl),
1599 })
1600 .unwrap(),
1601 )
1602 }
1603
destroy_pipeline_cache(&self, _cache: n::PipelineCache)1604 unsafe fn destroy_pipeline_cache(&self, _cache: n::PipelineCache) {
1605 //drop
1606 }
1607
merge_pipeline_caches<'a, I>( &self, _target: &mut n::PipelineCache, _sources: I, ) -> Result<(), d::OutOfMemory> where I: Iterator<Item = &'a n::PipelineCache>,1608 unsafe fn merge_pipeline_caches<'a, I>(
1609 &self,
1610 _target: &mut n::PipelineCache,
1611 _sources: I,
1612 ) -> Result<(), d::OutOfMemory>
1613 where
1614 I: Iterator<Item = &'a n::PipelineCache>,
1615 {
1616 warn!("`merge_pipeline_caches` is not currently implemented on the Metal backend.");
1617 Ok(())
1618 }
1619
create_graphics_pipeline<'a>( &self, pipeline_desc: &pso::GraphicsPipelineDesc<'a, Backend>, cache: Option<&n::PipelineCache>, ) -> Result<n::GraphicsPipeline, pso::CreationError>1620 unsafe fn create_graphics_pipeline<'a>(
1621 &self,
1622 pipeline_desc: &pso::GraphicsPipelineDesc<'a, Backend>,
1623 cache: Option<&n::PipelineCache>,
1624 ) -> Result<n::GraphicsPipeline, pso::CreationError> {
1625 profiling::scope!("create_graphics_pipeline");
1626 trace!("create_graphics_pipeline {:#?}", pipeline_desc);
1627
1628 let pipeline = metal::RenderPipelineDescriptor::new();
1629 let pipeline_layout = &pipeline_desc.layout;
1630 let (rp_attachments, subpass) = {
1631 let pass::Subpass { main_pass, index } = pipeline_desc.subpass;
1632 (&main_pass.attachments, &main_pass.subpasses[index as usize])
1633 };
1634
1635 let (desc_vertex_buffers, attributes, input_assembler, vs_ep) =
1636 match pipeline_desc.primitive_assembler {
1637 pso::PrimitiveAssemblerDesc::Vertex {
1638 tessellation: Some(_),
1639 ..
1640 } => {
1641 error!("Tessellation is not supported");
1642 return Err(pso::CreationError::UnsupportedPipeline);
1643 }
1644 pso::PrimitiveAssemblerDesc::Vertex {
1645 geometry: Some(_), ..
1646 } => {
1647 error!("Geometry shader is not supported");
1648 return Err(pso::CreationError::UnsupportedPipeline);
1649 }
1650 pso::PrimitiveAssemblerDesc::Mesh { .. } => {
1651 error!("Mesh shader is not supported");
1652 return Err(pso::CreationError::UnsupportedPipeline);
1653 }
1654 pso::PrimitiveAssemblerDesc::Vertex {
1655 buffers,
1656 attributes,
1657 ref input_assembler,
1658 ref vertex,
1659 tessellation: _,
1660 geometry: _,
1661 } => (buffers, attributes, input_assembler, vertex),
1662 };
1663
1664 let (primitive_class, primitive_type) = match input_assembler.primitive {
1665 pso::Primitive::PointList => {
1666 (MTLPrimitiveTopologyClass::Point, MTLPrimitiveType::Point)
1667 }
1668 pso::Primitive::LineList => (MTLPrimitiveTopologyClass::Line, MTLPrimitiveType::Line),
1669 pso::Primitive::LineStrip => {
1670 (MTLPrimitiveTopologyClass::Line, MTLPrimitiveType::LineStrip)
1671 }
1672 pso::Primitive::TriangleList => (
1673 MTLPrimitiveTopologyClass::Triangle,
1674 MTLPrimitiveType::Triangle,
1675 ),
1676 pso::Primitive::TriangleStrip => (
1677 MTLPrimitiveTopologyClass::Triangle,
1678 MTLPrimitiveType::TriangleStrip,
1679 ),
1680 pso::Primitive::PatchList(_) => (
1681 MTLPrimitiveTopologyClass::Unspecified,
1682 MTLPrimitiveType::Point,
1683 ),
1684 };
1685 if self.shared.private_caps.layered_rendering {
1686 pipeline.set_input_primitive_topology(primitive_class);
1687 }
1688
1689 // Vertex shader
1690 let vs = self.load_shader(
1691 vs_ep,
1692 pipeline_layout,
1693 primitive_class,
1694 cache,
1695 naga::ShaderStage::Vertex,
1696 )?;
1697
1698 pipeline.set_vertex_function(Some(&vs.function));
1699
1700 // Fragment shader
1701 let fs = match pipeline_desc.fragment {
1702 Some(ref ep) => Some(self.load_shader(
1703 ep,
1704 pipeline_layout,
1705 primitive_class,
1706 cache,
1707 naga::ShaderStage::Fragment,
1708 )?),
1709 None => {
1710 // TODO: This is a workaround for what appears to be a Metal validation bug
1711 // A pixel format is required even though no attachments are provided
1712 if subpass.attachments.colors.is_empty()
1713 && subpass.attachments.depth_stencil.is_none()
1714 {
1715 pipeline.set_depth_attachment_pixel_format(metal::MTLPixelFormat::Depth32Float);
1716 }
1717 None
1718 }
1719 };
1720
1721 if let Some(ref compiled) = fs {
1722 pipeline.set_fragment_function(Some(&compiled.function));
1723 }
1724 pipeline.set_rasterization_enabled(vs.rasterizing);
1725
1726 // Assign target formats
1727 let blend_targets = pipeline_desc
1728 .blender
1729 .targets
1730 .iter()
1731 .chain(iter::repeat(&pso::ColorBlendDesc::EMPTY));
1732 for (i, (at, color_desc)) in subpass
1733 .attachments
1734 .colors
1735 .iter()
1736 .zip(blend_targets)
1737 .enumerate()
1738 {
1739 let desc = pipeline
1740 .color_attachments()
1741 .object_at(i as u64)
1742 .expect("too many color attachments");
1743
1744 desc.set_pixel_format(at.format);
1745 desc.set_write_mask(conv::map_write_mask(color_desc.mask));
1746
1747 if let Some(ref blend) = color_desc.blend {
1748 desc.set_blending_enabled(true);
1749 let (color_op, color_src, color_dst) = conv::map_blend_op(blend.color);
1750 let (alpha_op, alpha_src, alpha_dst) = conv::map_blend_op(blend.alpha);
1751
1752 desc.set_rgb_blend_operation(color_op);
1753 desc.set_source_rgb_blend_factor(color_src);
1754 desc.set_destination_rgb_blend_factor(color_dst);
1755
1756 desc.set_alpha_blend_operation(alpha_op);
1757 desc.set_source_alpha_blend_factor(alpha_src);
1758 desc.set_destination_alpha_blend_factor(alpha_dst);
1759 }
1760 }
1761 if let Some(ref at) = subpass.attachments.depth_stencil {
1762 let orig_format = rp_attachments[at.id].format.unwrap();
1763 if orig_format.is_depth() {
1764 pipeline.set_depth_attachment_pixel_format(at.format);
1765 }
1766 if orig_format.is_stencil() {
1767 pipeline.set_stencil_attachment_pixel_format(at.format);
1768 }
1769 }
1770
1771 // Vertex buffers
1772 let vertex_descriptor = metal::VertexDescriptor::new();
1773 let mut vertex_buffers: n::VertexBufferVec = Vec::new();
1774 trace!("Vertex attribute remapping started");
1775
1776 for &pso::AttributeDesc {
1777 location,
1778 binding,
1779 element,
1780 } in attributes
1781 {
1782 let original = desc_vertex_buffers
1783 .iter()
1784 .find(|vb| vb.binding == binding)
1785 .expect("no associated vertex buffer found");
1786 // handle wrapping offsets
1787 let elem_size = element.format.surface_desc().bits as pso::ElemOffset / 8;
1788 let (cut_offset, base_offset) =
1789 if original.stride == 0 || element.offset + elem_size <= original.stride {
1790 (element.offset, 0)
1791 } else {
1792 let remainder = element.offset % original.stride;
1793 if remainder + elem_size <= original.stride {
1794 (remainder, element.offset - remainder)
1795 } else {
1796 (0, element.offset)
1797 }
1798 };
1799 let relative_index = vertex_buffers
1800 .iter()
1801 .position(|(ref vb, offset)| vb.binding == binding && base_offset == *offset)
1802 .unwrap_or_else(|| {
1803 vertex_buffers.alloc().init((original.clone(), base_offset));
1804 vertex_buffers.len() - 1
1805 });
1806 let mtl_buffer_index = self.shared.private_caps.max_buffers_per_stage
1807 - 1
1808 - (relative_index as ResourceIndex);
1809 if mtl_buffer_index < pipeline_layout.total.vs.buffers {
1810 error!("Attribute offset {} exceeds the stride {}, and there is no room for replacement.",
1811 element.offset, original.stride);
1812 return Err(pso::CreationError::Other);
1813 }
1814 trace!("\tAttribute[{}] is mapped to vertex buffer[{}] with binding {} and offsets {} + {}",
1815 location, binding, mtl_buffer_index, base_offset, cut_offset);
1816 // pass the refined data to Metal
1817 let mtl_attribute_desc = vertex_descriptor
1818 .attributes()
1819 .object_at(location as u64)
1820 .expect("too many vertex attributes");
1821 let mtl_vertex_format =
1822 conv::map_vertex_format(element.format).expect("unsupported vertex format");
1823 mtl_attribute_desc.set_format(mtl_vertex_format);
1824 mtl_attribute_desc.set_buffer_index(mtl_buffer_index as _);
1825 mtl_attribute_desc.set_offset(cut_offset as _);
1826 }
1827
1828 for (i, (vb, _)) in vertex_buffers.iter().enumerate() {
1829 let mtl_buffer_desc = vertex_descriptor
1830 .layouts()
1831 .object_at(self.shared.private_caps.max_buffers_per_stage as u64 - 1 - i as u64)
1832 .expect("too many vertex descriptor layouts");
1833 if vb.stride % STRIDE_GRANULARITY != 0 {
1834 error!(
1835 "Stride ({}) must be a multiple of {}",
1836 vb.stride, STRIDE_GRANULARITY
1837 );
1838 return Err(pso::CreationError::Other);
1839 }
1840 if vb.stride != 0 {
1841 mtl_buffer_desc.set_stride(vb.stride as u64);
1842 match vb.rate {
1843 VertexInputRate::Vertex => {
1844 mtl_buffer_desc.set_step_function(MTLVertexStepFunction::PerVertex);
1845 }
1846 VertexInputRate::Instance(divisor) => {
1847 mtl_buffer_desc.set_step_function(MTLVertexStepFunction::PerInstance);
1848 mtl_buffer_desc.set_step_rate(divisor as u64);
1849 }
1850 }
1851 } else {
1852 mtl_buffer_desc.set_stride(256); // big enough to fit all the elements
1853 mtl_buffer_desc.set_step_function(MTLVertexStepFunction::PerInstance);
1854 mtl_buffer_desc.set_step_rate(!0);
1855 }
1856 }
1857 if !vertex_buffers.is_empty() {
1858 pipeline.set_vertex_descriptor(Some(&vertex_descriptor));
1859 }
1860
1861 if let pso::State::Static(w) = pipeline_desc.rasterizer.line_width {
1862 if w != 1.0 {
1863 warn!("Unsupported line width: {:?}", w);
1864 }
1865 }
1866
1867 let rasterizer_state = Some(n::RasterizerState {
1868 front_winding: conv::map_winding(pipeline_desc.rasterizer.front_face),
1869 fill_mode: conv::map_polygon_mode(pipeline_desc.rasterizer.polygon_mode),
1870 cull_mode: match conv::map_cull_face(pipeline_desc.rasterizer.cull_face) {
1871 Some(mode) => mode,
1872 None => {
1873 //TODO - Metal validation fails with
1874 // RasterizationEnabled is false but the vertex shader's return type is not void
1875 error!("Culling both sides is not yet supported");
1876 //pipeline.set_rasterization_enabled(false);
1877 metal::MTLCullMode::None
1878 }
1879 },
1880 depth_clip: if self.shared.private_caps.depth_clip_mode {
1881 Some(if pipeline_desc.rasterizer.depth_clamping {
1882 metal::MTLDepthClipMode::Clamp
1883 } else {
1884 metal::MTLDepthClipMode::Clip
1885 })
1886 } else {
1887 None
1888 },
1889 });
1890 let depth_bias = pipeline_desc
1891 .rasterizer
1892 .depth_bias
1893 .unwrap_or(pso::State::Static(pso::DepthBias::default()));
1894
1895 // prepare the depth-stencil state now
1896 let device = self.shared.device.lock();
1897 self.shared
1898 .service_pipes
1899 .depth_stencil_states
1900 .prepare(&pipeline_desc.depth_stencil, &*device);
1901
1902 let samples = if let Some(multisampling) = &pipeline_desc.multisampling {
1903 pipeline.set_sample_count(multisampling.rasterization_samples as u64);
1904 pipeline.set_alpha_to_coverage_enabled(multisampling.alpha_coverage);
1905 pipeline.set_alpha_to_one_enabled(multisampling.alpha_to_one);
1906 // TODO: sample_mask
1907 // TODO: sample_shading
1908 multisampling.rasterization_samples
1909 } else {
1910 1
1911 };
1912
1913 if let Some(name) = pipeline_desc.label {
1914 pipeline.set_label(name);
1915 }
1916
1917 profiling::scope!("Metal::new_render_pipeline_state");
1918
1919 #[cfg(feature = "pipeline-cache")]
1920 if let Some(binary_archive) = pipeline_cache::pipeline_cache_to_binary_archive(cache) {
1921 pipeline.set_binary_archives(&[&binary_archive.inner]);
1922 }
1923
1924 let (fs_lib, ps_sized_bindings) = match fs {
1925 Some(compiled) => (Some(compiled.library), compiled.sized_bindings),
1926 None => (None, Vec::new()),
1927 };
1928
1929 let pipeline_state = device
1930 // Replace this with `new_render_pipeline_state_with_fail_on_binary_archive_miss`
1931 // to debug that the cache is actually working.
1932 .new_render_pipeline_state(&pipeline)
1933 .map(|raw| n::GraphicsPipeline {
1934 vs_lib: vs.library,
1935 fs_lib,
1936 raw,
1937 primitive_type,
1938 vs_info: n::PipelineStageInfo {
1939 push_constants: pipeline_desc.layout.push_constants.vs,
1940 sizes_slot: pipeline_desc
1941 .layout
1942 .naga_options
1943 .per_stage_map
1944 .vs
1945 .sizes_buffer,
1946 sized_bindings: vs.sized_bindings,
1947 },
1948 ps_info: n::PipelineStageInfo {
1949 push_constants: pipeline_desc.layout.push_constants.ps,
1950 sizes_slot: pipeline_desc
1951 .layout
1952 .naga_options
1953 .per_stage_map
1954 .fs
1955 .sizes_buffer,
1956 sized_bindings: ps_sized_bindings,
1957 },
1958 rasterizer_state,
1959 depth_bias,
1960 depth_stencil_desc: pipeline_desc.depth_stencil.clone(),
1961 baked_states: pipeline_desc.baked_states.clone(),
1962 vertex_buffers,
1963 attachment_formats: subpass.attachments.map(|at| (at.format, at.channel)),
1964 samples,
1965 })
1966 .map_err(|err| {
1967 error!("PSO creation failed: {}", err);
1968 pso::CreationError::Other
1969 })?;
1970
1971 // We need to add the pipline descriptor to the binary archive after creating the
1972 // pipeline, otherwise `new_render_pipeline_state_with_fail_on_binary_archive_miss`
1973 // succeeds when it shouldn't.
1974 #[cfg(feature = "pipeline-cache")]
1975 if let Some(binary_archive) = pipeline_cache::pipeline_cache_to_binary_archive(cache) {
1976 binary_archive
1977 .inner
1978 .add_render_pipeline_functions_with_descriptor(&pipeline)
1979 .unwrap();
1980 binary_archive.is_empty.store(false, Ordering::Relaxed);
1981 }
1982
1983 Ok(pipeline_state)
1984 }
1985
create_compute_pipeline<'a>( &self, pipeline_desc: &pso::ComputePipelineDesc<'a, Backend>, cache: Option<&n::PipelineCache>, ) -> Result<n::ComputePipeline, pso::CreationError>1986 unsafe fn create_compute_pipeline<'a>(
1987 &self,
1988 pipeline_desc: &pso::ComputePipelineDesc<'a, Backend>,
1989 cache: Option<&n::PipelineCache>,
1990 ) -> Result<n::ComputePipeline, pso::CreationError> {
1991 profiling::scope!("create_compute_pipeline");
1992 trace!("create_compute_pipeline {:?}", pipeline_desc);
1993 let pipeline = metal::ComputePipelineDescriptor::new();
1994
1995 let cs = self.load_shader(
1996 &pipeline_desc.shader,
1997 &pipeline_desc.layout,
1998 MTLPrimitiveTopologyClass::Unspecified,
1999 cache,
2000 naga::ShaderStage::Compute,
2001 )?;
2002 pipeline.set_compute_function(Some(&cs.function));
2003 if let Some(name) = pipeline_desc.label {
2004 pipeline.set_label(name);
2005 }
2006
2007 profiling::scope!("Metal::new_compute_pipeline_state");
2008
2009 #[cfg(feature = "pipeline-cache")]
2010 if let Some(binary_archive) = pipeline_cache::pipeline_cache_to_binary_archive(cache) {
2011 pipeline.set_binary_archives(&[&binary_archive.inner]);
2012 }
2013
2014 let pipeline_state = self
2015 .shared
2016 .device
2017 .lock()
2018 .new_compute_pipeline_state(&pipeline)
2019 .map(|raw| n::ComputePipeline {
2020 cs_lib: cs.library,
2021 raw,
2022 work_group_size: cs.wg_size,
2023 info: n::PipelineStageInfo {
2024 push_constants: pipeline_desc.layout.push_constants.cs,
2025 sizes_slot: pipeline_desc
2026 .layout
2027 .naga_options
2028 .per_stage_map
2029 .cs
2030 .sizes_buffer,
2031 sized_bindings: cs.sized_bindings,
2032 },
2033 })
2034 .map_err(|err| {
2035 error!("PSO creation failed: {}", err);
2036 pso::CreationError::Other
2037 })?;
2038
2039 // We need to add the pipline descriptor to the binary archive after creating the
2040 // pipeline, see `create_graphics_pipeline`.
2041 #[cfg(feature = "pipeline-cache")]
2042 if let Some(binary_archive) = pipeline_cache::pipeline_cache_to_binary_archive(cache) {
2043 binary_archive
2044 .inner
2045 .add_compute_pipeline_functions_with_descriptor(&pipeline)
2046 .unwrap();
2047 binary_archive.is_empty.store(false, Ordering::Relaxed)
2048 }
2049
2050 Ok(pipeline_state)
2051 }
2052
create_framebuffer<I>( &self, _render_pass: &n::RenderPass, _attachments: I, extent: image::Extent, ) -> Result<n::Framebuffer, d::OutOfMemory>2053 unsafe fn create_framebuffer<I>(
2054 &self,
2055 _render_pass: &n::RenderPass,
2056 _attachments: I,
2057 extent: image::Extent,
2058 ) -> Result<n::Framebuffer, d::OutOfMemory> {
2059 Ok(n::Framebuffer { extent })
2060 }
2061
create_shader_module( &self, raw_data: &[u32], ) -> Result<n::ShaderModule, d::ShaderError>2062 unsafe fn create_shader_module(
2063 &self,
2064 raw_data: &[u32],
2065 ) -> Result<n::ShaderModule, d::ShaderError> {
2066 profiling::scope!("create_shader_module");
2067 Ok(n::ShaderModule {
2068 #[cfg(feature = "cross")]
2069 spv: raw_data.to_vec(),
2070 #[cfg(feature = "pipeline-cache")]
2071 spv_hash: fxhash::hash64(raw_data),
2072 naga: if cfg!(feature = "cross") {
2073 Err("Cross is enabled".into())
2074 } else {
2075 let options = naga::front::spv::Options {
2076 adjust_coordinate_space: !self.features.contains(hal::Features::NDC_Y_UP),
2077 strict_capabilities: true,
2078 flow_graph_dump_prefix: None,
2079 };
2080 let parse_result = {
2081 profiling::scope!("naga::spv::parse");
2082 let parser = naga::front::spv::Parser::new(raw_data.iter().cloned(), &options);
2083 parser.parse()
2084 };
2085 match parse_result {
2086 Ok(module) => {
2087 debug!("Naga module {:#?}", module);
2088 match naga::valid::Validator::new(
2089 naga::valid::ValidationFlags::empty(),
2090 naga::valid::Capabilities::PUSH_CONSTANT,
2091 )
2092 .validate(&module)
2093 {
2094 Ok(info) => Ok(d::NagaShader { module, info }),
2095 Err(e) => Err(format!("Naga validation: {}", e)),
2096 }
2097 }
2098 Err(e) => Err(format!("Naga parsing: {:?}", e)),
2099 }
2100 },
2101 })
2102 }
2103
create_shader_module_from_naga( &self, shader: d::NagaShader, ) -> Result<n::ShaderModule, (d::ShaderError, d::NagaShader)>2104 unsafe fn create_shader_module_from_naga(
2105 &self,
2106 shader: d::NagaShader,
2107 ) -> Result<n::ShaderModule, (d::ShaderError, d::NagaShader)> {
2108 profiling::scope!("create_shader_module_from_naga");
2109
2110 #[cfg(any(feature = "pipeline-cache", feature = "cross"))]
2111 let spv = match naga::back::spv::write_vec(&shader.module, &shader.info, &self.spv_options)
2112 {
2113 Ok(spv) => spv,
2114 Err(e) => return Err((d::ShaderError::CompilationFailed(format!("{}", e)), shader)),
2115 };
2116
2117 Ok(n::ShaderModule {
2118 #[cfg(feature = "pipeline-cache")]
2119 spv_hash: fxhash::hash64(&spv),
2120 #[cfg(feature = "cross")]
2121 spv,
2122 naga: Ok(shader),
2123 })
2124 }
2125
create_sampler( &self, info: &image::SamplerDesc, ) -> Result<n::Sampler, d::AllocationError>2126 unsafe fn create_sampler(
2127 &self,
2128 info: &image::SamplerDesc,
2129 ) -> Result<n::Sampler, d::AllocationError> {
2130 Ok(n::Sampler {
2131 raw: match self.make_sampler_descriptor(info) {
2132 Some(ref descriptor) => Some(self.shared.device.lock().new_sampler(descriptor)),
2133 None => None,
2134 },
2135 data: conv::map_sampler_data_to_naga(info),
2136 #[cfg(feature = "cross")]
2137 cross_data: conv::map_sampler_data_to_cross(info),
2138 })
2139 }
2140
destroy_sampler(&self, _sampler: n::Sampler)2141 unsafe fn destroy_sampler(&self, _sampler: n::Sampler) {}
2142
map_memory( &self, memory: &mut n::Memory, segment: memory::Segment, ) -> Result<*mut u8, d::MapError>2143 unsafe fn map_memory(
2144 &self,
2145 memory: &mut n::Memory,
2146 segment: memory::Segment,
2147 ) -> Result<*mut u8, d::MapError> {
2148 let range = memory.resolve(&segment);
2149 debug!("map_memory of size {} at {:?}", memory.size, range);
2150
2151 let base_ptr = match memory.heap {
2152 n::MemoryHeap::Public(_, ref cpu_buffer) => cpu_buffer.contents() as *mut u8,
2153 n::MemoryHeap::Native(_) | n::MemoryHeap::Private => panic!("Unable to map memory!"),
2154 };
2155 Ok(base_ptr.offset(range.start as _))
2156 }
2157
unmap_memory(&self, memory: &mut n::Memory)2158 unsafe fn unmap_memory(&self, memory: &mut n::Memory) {
2159 debug!("unmap_memory of size {}", memory.size);
2160 }
2161
flush_mapped_memory_ranges<'a, I>(&self, iter: I) -> Result<(), d::OutOfMemory> where I: Iterator<Item = (&'a n::Memory, memory::Segment)>,2162 unsafe fn flush_mapped_memory_ranges<'a, I>(&self, iter: I) -> Result<(), d::OutOfMemory>
2163 where
2164 I: Iterator<Item = (&'a n::Memory, memory::Segment)>,
2165 {
2166 debug!("flush_mapped_memory_ranges");
2167 for (memory, ref segment) in iter {
2168 let range = memory.resolve(segment);
2169 debug!("\trange {:?}", range);
2170
2171 match memory.heap {
2172 n::MemoryHeap::Native(_) => unimplemented!(),
2173 n::MemoryHeap::Public(mt, ref cpu_buffer)
2174 if 1 << mt.0 != MemoryTypes::SHARED.bits() as usize =>
2175 {
2176 cpu_buffer.did_modify_range(NSRange {
2177 location: range.start as _,
2178 length: (range.end - range.start) as _,
2179 });
2180 }
2181 n::MemoryHeap::Public(..) => continue,
2182 n::MemoryHeap::Private => panic!("Can't map private memory!"),
2183 };
2184 }
2185
2186 Ok(())
2187 }
2188
invalidate_mapped_memory_ranges<'a, I>(&self, iter: I) -> Result<(), d::OutOfMemory> where I: Iterator<Item = (&'a n::Memory, memory::Segment)>,2189 unsafe fn invalidate_mapped_memory_ranges<'a, I>(&self, iter: I) -> Result<(), d::OutOfMemory>
2190 where
2191 I: Iterator<Item = (&'a n::Memory, memory::Segment)>,
2192 {
2193 let mut num_syncs = 0;
2194 debug!("invalidate_mapped_memory_ranges");
2195
2196 // temporary command buffer to copy the contents from
2197 // the given buffers into the allocated CPU-visible buffers
2198 // Note: using a separate internal queue in order to avoid a stall
2199 let cmd_buffer = self.invalidation_queue.spawn_temp();
2200 autoreleasepool(|| {
2201 let encoder = cmd_buffer.new_blit_command_encoder();
2202
2203 for (memory, ref segment) in iter {
2204 let range = memory.resolve(segment);
2205 debug!("\trange {:?}", range);
2206
2207 match memory.heap {
2208 n::MemoryHeap::Native(_) => unimplemented!(),
2209 n::MemoryHeap::Public(mt, ref cpu_buffer)
2210 if 1 << mt.0 != MemoryTypes::SHARED.bits() as usize =>
2211 {
2212 num_syncs += 1;
2213 encoder.synchronize_resource(cpu_buffer);
2214 }
2215 n::MemoryHeap::Public(..) => continue,
2216 n::MemoryHeap::Private => panic!("Can't map private memory!"),
2217 };
2218 }
2219 encoder.end_encoding();
2220 });
2221
2222 if num_syncs != 0 {
2223 debug!("\twaiting...");
2224 cmd_buffer.set_label("invalidate_mapped_memory_ranges");
2225 cmd_buffer.commit();
2226 cmd_buffer.wait_until_completed();
2227 }
2228
2229 Ok(())
2230 }
2231
create_semaphore(&self) -> Result<n::Semaphore, d::OutOfMemory>2232 fn create_semaphore(&self) -> Result<n::Semaphore, d::OutOfMemory> {
2233 Ok(n::Semaphore {
2234 // Semaphore synchronization between command buffers of the same queue
2235 // is useless, don't bother even creating one.
2236 system: if self.shared.private_caps.exposed_queues > 1 {
2237 Some(n::SystemSemaphore::new())
2238 } else {
2239 None
2240 },
2241 })
2242 }
2243
create_descriptor_pool<I>( &self, max_sets: usize, descriptor_ranges: I, _flags: pso::DescriptorPoolCreateFlags, ) -> Result<n::DescriptorPool, d::OutOfMemory> where I: Iterator<Item = pso::DescriptorRangeDesc>,2244 unsafe fn create_descriptor_pool<I>(
2245 &self,
2246 max_sets: usize,
2247 descriptor_ranges: I,
2248 _flags: pso::DescriptorPoolCreateFlags,
2249 ) -> Result<n::DescriptorPool, d::OutOfMemory>
2250 where
2251 I: Iterator<Item = pso::DescriptorRangeDesc>,
2252 {
2253 if self.shared.private_caps.argument_buffers {
2254 let mut arguments = n::ArgumentArray::default();
2255 for dr in descriptor_ranges {
2256 let content = n::DescriptorContent::from(dr.ty);
2257 let usage = n::ArgumentArray::describe_usage(dr.ty);
2258 if content.contains(n::DescriptorContent::BUFFER) {
2259 arguments.push(metal::MTLDataType::Pointer, dr.count, usage);
2260 }
2261 if content.contains(n::DescriptorContent::TEXTURE) {
2262 arguments.push(metal::MTLDataType::Texture, dr.count, usage);
2263 }
2264 if content.contains(n::DescriptorContent::SAMPLER) {
2265 arguments.push(metal::MTLDataType::Sampler, dr.count, usage);
2266 }
2267 }
2268
2269 let device = self.shared.device.lock();
2270 let (array_ref, total_resources) = arguments.build();
2271 let encoder = device.new_argument_encoder(array_ref);
2272
2273 let alignment = self.shared.private_caps.buffer_alignment;
2274 let total_size = encoder.encoded_length() + (max_sets as u64) * alignment;
2275 let raw = device.new_buffer(total_size, MTLResourceOptions::empty());
2276
2277 Ok(n::DescriptorPool::new_argument(
2278 raw,
2279 total_size,
2280 alignment,
2281 total_resources,
2282 ))
2283 } else {
2284 let mut counters = n::ResourceData::<n::PoolResourceIndex>::new();
2285 for dr in descriptor_ranges {
2286 counters.add_many(
2287 n::DescriptorContent::from(dr.ty),
2288 dr.count as pso::DescriptorBinding,
2289 );
2290 }
2291 Ok(n::DescriptorPool::new_emulated(counters))
2292 }
2293 }
2294
create_descriptor_set_layout<'a, I, J>( &self, binding_iter: I, immutable_samplers: J, ) -> Result<n::DescriptorSetLayout, d::OutOfMemory> where I: Iterator<Item = pso::DescriptorSetLayoutBinding>, J: Iterator<Item = &'a n::Sampler>,2295 unsafe fn create_descriptor_set_layout<'a, I, J>(
2296 &self,
2297 binding_iter: I,
2298 immutable_samplers: J,
2299 ) -> Result<n::DescriptorSetLayout, d::OutOfMemory>
2300 where
2301 I: Iterator<Item = pso::DescriptorSetLayoutBinding>,
2302 J: Iterator<Item = &'a n::Sampler>,
2303 {
2304 if self.shared.private_caps.argument_buffers {
2305 let mut stage_flags = pso::ShaderStageFlags::empty();
2306 let mut arguments = n::ArgumentArray::default();
2307 let mut bindings = FastHashMap::default();
2308 for desc in binding_iter {
2309 //TODO: have the API providing the dimensions and MSAA flag
2310 // for textures in an argument buffer
2311 match desc.ty {
2312 pso::DescriptorType::Buffer {
2313 format:
2314 pso::BufferDescriptorFormat::Structured {
2315 dynamic_offset: true,
2316 },
2317 ..
2318 } => {
2319 //TODO: apply the offsets somehow at the binding time
2320 error!("Dynamic offsets are not yet supported in argument buffers!");
2321 }
2322 pso::DescriptorType::Image {
2323 ty: pso::ImageDescriptorType::Storage { .. },
2324 }
2325 | pso::DescriptorType::Buffer {
2326 ty: pso::BufferDescriptorType::Storage { .. },
2327 format: pso::BufferDescriptorFormat::Texel,
2328 } => {
2329 //TODO: bind storage buffers and images separately
2330 error!("Storage images are not yet supported in argument buffers!");
2331 }
2332 _ => {}
2333 }
2334
2335 stage_flags |= desc.stage_flags;
2336 let content = n::DescriptorContent::from(desc.ty);
2337 let usage = n::ArgumentArray::describe_usage(desc.ty);
2338 let bind_target = naga::back::msl::BindTarget {
2339 buffer: if content.contains(n::DescriptorContent::BUFFER) {
2340 Some(
2341 arguments.push(metal::MTLDataType::Pointer, desc.count, usage)
2342 as naga::back::msl::Slot,
2343 )
2344 } else {
2345 None
2346 },
2347 texture: if content.contains(n::DescriptorContent::TEXTURE) {
2348 Some(
2349 arguments.push(metal::MTLDataType::Texture, desc.count, usage)
2350 as naga::back::msl::Slot,
2351 )
2352 } else {
2353 None
2354 },
2355 sampler: if content.contains(n::DescriptorContent::SAMPLER) {
2356 let slot = arguments.push(metal::MTLDataType::Sampler, desc.count, usage);
2357 Some(naga::back::msl::BindSamplerTarget::Resource(
2358 slot as naga::back::msl::Slot,
2359 ))
2360 } else {
2361 None
2362 },
2363 mutable: content.contains(n::DescriptorContent::WRITABLE),
2364 };
2365 let res_offset = bind_target
2366 .buffer
2367 .or(bind_target.texture)
2368 .or(bind_target.sampler.as_ref().and_then(|bst| match *bst {
2369 naga::back::msl::BindSamplerTarget::Resource(slot) => Some(slot),
2370 naga::back::msl::BindSamplerTarget::Inline(_) => None,
2371 }))
2372 .unwrap() as u32;
2373 bindings.insert(
2374 desc.binding,
2375 n::ArgumentLayout {
2376 bind_target,
2377 res_offset,
2378 count: desc.count,
2379 usage,
2380 content,
2381 },
2382 );
2383 }
2384
2385 let (array_ref, arg_total) = arguments.build();
2386 let encoder = self.shared.device.lock().new_argument_encoder(array_ref);
2387
2388 Ok(n::DescriptorSetLayout::ArgumentBuffer {
2389 encoder,
2390 stage_flags,
2391 bindings: Arc::new(bindings),
2392 total: arg_total as n::PoolResourceIndex,
2393 })
2394 } else {
2395 struct TempSampler {
2396 data: n::ImmutableSampler,
2397 binding: pso::DescriptorBinding,
2398 array_index: pso::DescriptorArrayIndex,
2399 }
2400 let mut immutable_sampler_iter = immutable_samplers;
2401 let mut tmp_samplers = Vec::new();
2402 let mut desc_layouts = Vec::new();
2403 let mut total = n::ResourceData::new();
2404
2405 for slb in binding_iter {
2406 let mut content = n::DescriptorContent::from(slb.ty);
2407 total.add_many(content, slb.count as _);
2408
2409 #[cfg_attr(not(feature = "cross"), allow(unused_variables))]
2410 if slb.immutable_samplers {
2411 tmp_samplers.extend(
2412 immutable_sampler_iter
2413 .by_ref()
2414 .take(slb.count)
2415 .enumerate()
2416 .map(|(array_index, sm)| TempSampler {
2417 data: n::ImmutableSampler {
2418 data: sm.data.clone(),
2419 #[cfg(feature = "cross")]
2420 cross_data: sm.cross_data.clone(),
2421 },
2422 binding: slb.binding,
2423 array_index,
2424 }),
2425 );
2426 content |= n::DescriptorContent::IMMUTABLE_SAMPLER;
2427 }
2428
2429 desc_layouts.extend((0..slb.count).map(|array_index| n::DescriptorLayout {
2430 content,
2431 stages: slb.stage_flags,
2432 binding: slb.binding,
2433 array_index,
2434 }));
2435 }
2436
2437 desc_layouts.sort_by_key(|dl| (dl.binding, dl.array_index));
2438 tmp_samplers.sort_by_key(|ts| (ts.binding, ts.array_index));
2439 // From here on, we assume that `desc_layouts` has at most a single item for
2440 // a (binding, array_index) pair. To achieve that, we deduplicate the array now
2441 desc_layouts.dedup_by(|a, b| {
2442 if (a.binding, a.array_index) == (b.binding, b.array_index) {
2443 debug_assert!(!b.stages.intersects(a.stages));
2444 debug_assert_eq!(a.content, b.content); //TODO: double check if this can be demanded
2445 b.stages |= a.stages; //`b` is here to stay
2446 true
2447 } else {
2448 false
2449 }
2450 });
2451
2452 Ok(n::DescriptorSetLayout::Emulated {
2453 layouts: Arc::new(desc_layouts),
2454 total,
2455 immutable_samplers: tmp_samplers
2456 .into_iter()
2457 .map(|ts| (ts.binding, ts.data))
2458 .collect(),
2459 })
2460 }
2461 }
2462
write_descriptor_set<'a, I>(&self, op: pso::DescriptorSetWrite<'a, Backend, I>) where I: Iterator<Item = pso::Descriptor<'a, Backend>>,2463 unsafe fn write_descriptor_set<'a, I>(&self, op: pso::DescriptorSetWrite<'a, Backend, I>)
2464 where
2465 I: Iterator<Item = pso::Descriptor<'a, Backend>>,
2466 {
2467 debug!("write_descriptor_set");
2468 match *op.set {
2469 n::DescriptorSet::Emulated {
2470 ref pool,
2471 ref layouts,
2472 ref resources,
2473 } => {
2474 let mut counters = resources.map(|r| r.start);
2475 let mut start = None; //TODO: can pre-compute this
2476 for (i, layout) in layouts.iter().enumerate() {
2477 if layout.binding == op.binding && layout.array_index == op.array_offset {
2478 start = Some(i);
2479 break;
2480 }
2481 counters.add(layout.content);
2482 }
2483 let mut data = pool.write();
2484
2485 for (layout, descriptor) in layouts[start.unwrap()..].iter().zip(op.descriptors) {
2486 trace!("\t{:?}", layout);
2487 match descriptor {
2488 pso::Descriptor::Sampler(sam) => {
2489 debug_assert!(!layout
2490 .content
2491 .contains(n::DescriptorContent::IMMUTABLE_SAMPLER));
2492 data.samplers[counters.samplers as usize] = (
2493 layout.stages,
2494 Some(AsNative::from(sam.raw.as_ref().unwrap().as_ref())),
2495 );
2496 }
2497 pso::Descriptor::Image(view, il) => {
2498 data.textures[counters.textures as usize] = (
2499 layout.stages,
2500 Some(AsNative::from(view.texture.as_ref())),
2501 il,
2502 );
2503 }
2504 pso::Descriptor::CombinedImageSampler(view, il, sam) => {
2505 if !layout
2506 .content
2507 .contains(n::DescriptorContent::IMMUTABLE_SAMPLER)
2508 {
2509 data.samplers[counters.samplers as usize] = (
2510 layout.stages,
2511 Some(AsNative::from(sam.raw.as_ref().unwrap().as_ref())),
2512 );
2513 }
2514 data.textures[counters.textures as usize] = (
2515 layout.stages,
2516 Some(AsNative::from(view.texture.as_ref())),
2517 il,
2518 );
2519 }
2520 pso::Descriptor::TexelBuffer(view) => {
2521 data.textures[counters.textures as usize] = (
2522 layout.stages,
2523 Some(AsNative::from(view.raw.as_ref())),
2524 image::Layout::General,
2525 );
2526 }
2527 pso::Descriptor::Buffer(buf, ref sub) => {
2528 let (raw, range) = buf.as_bound();
2529 debug_assert!(
2530 range.start + sub.offset + sub.size.unwrap_or(0) <= range.end
2531 );
2532 let raw_binding_size = match sub.size {
2533 Some(size) => size,
2534 None => range.end - range.start - sub.offset,
2535 };
2536 data.buffers[counters.buffers as usize] = (
2537 layout.stages,
2538 Some(AsNative::from(raw)),
2539 range.start + sub.offset,
2540 layout.binding,
2541 if layout.content.contains(n::DescriptorContent::SIZED_BUFFER) {
2542 raw_binding_size.min(u32::MAX as buffer::Offset - 1) as u32
2543 } else {
2544 !0
2545 },
2546 );
2547 }
2548 }
2549 counters.add(layout.content);
2550 }
2551 }
2552 n::DescriptorSet::ArgumentBuffer {
2553 ref raw,
2554 raw_offset,
2555 ref pool,
2556 ref range,
2557 ref encoder,
2558 ref bindings,
2559 ..
2560 } => {
2561 debug_assert!(self.shared.private_caps.argument_buffers);
2562
2563 encoder.set_argument_buffer(raw, raw_offset);
2564 let mut arg_index = {
2565 let binding = &bindings[&op.binding];
2566 debug_assert!((op.array_offset as usize) < binding.count);
2567 (binding.res_offset as NSUInteger) + (op.array_offset as NSUInteger)
2568 };
2569
2570 for (data, descriptor) in pool.write().resources
2571 [range.start as usize + arg_index as usize..range.end as usize]
2572 .iter_mut()
2573 .zip(op.descriptors)
2574 {
2575 match descriptor {
2576 pso::Descriptor::Sampler(sampler) => {
2577 debug_assert!(!bindings[&op.binding]
2578 .content
2579 .contains(n::DescriptorContent::IMMUTABLE_SAMPLER));
2580 encoder.set_sampler_state(arg_index, sampler.raw.as_ref().unwrap());
2581 arg_index += 1;
2582 }
2583 pso::Descriptor::Image(image, _layout) => {
2584 let tex_ref = image.texture.as_ref();
2585 encoder.set_texture(arg_index, tex_ref);
2586 data.ptr = (&**tex_ref).as_ptr();
2587 arg_index += 1;
2588 }
2589 pso::Descriptor::CombinedImageSampler(image, _il, sampler) => {
2590 let binding = &bindings[&op.binding];
2591 if !binding
2592 .content
2593 .contains(n::DescriptorContent::IMMUTABLE_SAMPLER)
2594 {
2595 //TODO: supporting arrays of combined image-samplers can be tricky.
2596 // We need to scan both sampler and image sections of the encoder
2597 // at the same time.
2598 assert!(
2599 arg_index
2600 < (binding.res_offset as NSUInteger)
2601 + (binding.count as NSUInteger)
2602 );
2603 encoder.set_sampler_state(
2604 arg_index + binding.count as NSUInteger,
2605 sampler.raw.as_ref().unwrap(),
2606 );
2607 }
2608 let tex_ref = image.texture.as_ref();
2609 encoder.set_texture(arg_index, tex_ref);
2610 data.ptr = (&**tex_ref).as_ptr();
2611 }
2612 pso::Descriptor::TexelBuffer(view) => {
2613 encoder.set_texture(arg_index, &view.raw);
2614 data.ptr = (&**view.raw).as_ptr();
2615 arg_index += 1;
2616 }
2617 pso::Descriptor::Buffer(buffer, ref sub) => {
2618 let (buf_raw, buf_range) = buffer.as_bound();
2619 encoder.set_buffer(arg_index, buf_raw, buf_range.start + sub.offset);
2620 data.ptr = (&**buf_raw).as_ptr();
2621 arg_index += 1;
2622 }
2623 }
2624 }
2625 }
2626 }
2627 }
2628
copy_descriptor_set<'a>(&self, _op: pso::DescriptorSetCopy<'a, Backend>)2629 unsafe fn copy_descriptor_set<'a>(&self, _op: pso::DescriptorSetCopy<'a, Backend>) {
2630 unimplemented!()
2631 }
2632
destroy_descriptor_pool(&self, _pool: n::DescriptorPool)2633 unsafe fn destroy_descriptor_pool(&self, _pool: n::DescriptorPool) {}
2634
destroy_descriptor_set_layout(&self, _layout: n::DescriptorSetLayout)2635 unsafe fn destroy_descriptor_set_layout(&self, _layout: n::DescriptorSetLayout) {}
2636
destroy_pipeline_layout(&self, _pipeline_layout: n::PipelineLayout)2637 unsafe fn destroy_pipeline_layout(&self, _pipeline_layout: n::PipelineLayout) {}
2638
destroy_shader_module(&self, _module: n::ShaderModule)2639 unsafe fn destroy_shader_module(&self, _module: n::ShaderModule) {}
2640
destroy_render_pass(&self, _pass: n::RenderPass)2641 unsafe fn destroy_render_pass(&self, _pass: n::RenderPass) {}
2642
destroy_graphics_pipeline(&self, _pipeline: n::GraphicsPipeline)2643 unsafe fn destroy_graphics_pipeline(&self, _pipeline: n::GraphicsPipeline) {}
2644
destroy_compute_pipeline(&self, _pipeline: n::ComputePipeline)2645 unsafe fn destroy_compute_pipeline(&self, _pipeline: n::ComputePipeline) {}
2646
destroy_framebuffer(&self, _framebuffer: n::Framebuffer)2647 unsafe fn destroy_framebuffer(&self, _framebuffer: n::Framebuffer) {}
2648
destroy_semaphore(&self, _semaphore: n::Semaphore)2649 unsafe fn destroy_semaphore(&self, _semaphore: n::Semaphore) {}
2650
allocate_memory( &self, memory_type: hal::MemoryTypeId, size: u64, ) -> Result<n::Memory, d::AllocationError>2651 unsafe fn allocate_memory(
2652 &self,
2653 memory_type: hal::MemoryTypeId,
2654 size: u64,
2655 ) -> Result<n::Memory, d::AllocationError> {
2656 profiling::scope!("allocate_memory");
2657 let (storage, cache) = MemoryTypes::describe(memory_type.0);
2658 let device = self.shared.device.lock();
2659 debug!("allocate_memory type {:?} of size {}", memory_type, size);
2660
2661 // Heaps cannot be used for CPU coherent resources
2662 //TEMP: MacOS supports Private only, iOS and tvOS can do private/shared
2663 let heap = if self.shared.private_caps.resource_heaps
2664 && storage != MTLStorageMode::Shared
2665 && false
2666 {
2667 let descriptor = metal::HeapDescriptor::new();
2668 descriptor.set_storage_mode(storage);
2669 descriptor.set_cpu_cache_mode(cache);
2670 descriptor.set_size(size);
2671 let heap_raw = device.new_heap(&descriptor);
2672 n::MemoryHeap::Native(heap_raw)
2673 } else if storage == MTLStorageMode::Private {
2674 n::MemoryHeap::Private
2675 } else {
2676 let options = conv::resource_options_from_storage_and_cache(storage, cache);
2677 let cpu_buffer = device.new_buffer(size, options);
2678 debug!("\tbacked by cpu buffer {:?}", cpu_buffer.as_ptr());
2679 n::MemoryHeap::Public(memory_type, cpu_buffer)
2680 };
2681
2682 Ok(n::Memory::new(heap, size))
2683 }
2684
free_memory(&self, memory: n::Memory)2685 unsafe fn free_memory(&self, memory: n::Memory) {
2686 profiling::scope!("free_memory");
2687 debug!("free_memory of size {}", memory.size);
2688 if let n::MemoryHeap::Public(_, ref cpu_buffer) = memory.heap {
2689 debug!("\tbacked by cpu buffer {:?}", cpu_buffer.as_ptr());
2690 }
2691 }
2692
create_buffer( &self, size: u64, usage: buffer::Usage, _sparse: memory::SparseFlags, ) -> Result<n::Buffer, buffer::CreationError>2693 unsafe fn create_buffer(
2694 &self,
2695 size: u64,
2696 usage: buffer::Usage,
2697 _sparse: memory::SparseFlags,
2698 ) -> Result<n::Buffer, buffer::CreationError> {
2699 debug!("create_buffer of size {} and usage {:?}", size, usage);
2700 Ok(n::Buffer::Unbound {
2701 usage,
2702 size,
2703 name: String::new(),
2704 })
2705 }
2706
get_buffer_requirements(&self, buffer: &n::Buffer) -> memory::Requirements2707 unsafe fn get_buffer_requirements(&self, buffer: &n::Buffer) -> memory::Requirements {
2708 let (size, usage) = match *buffer {
2709 n::Buffer::Unbound { size, usage, .. } => (size, usage),
2710 n::Buffer::Bound { .. } => panic!("Unexpected Buffer::Bound"),
2711 };
2712 let mut max_size = size;
2713 let mut max_alignment = self.shared.private_caps.buffer_alignment;
2714
2715 if self.shared.private_caps.resource_heaps {
2716 // We don't know what memory type the user will try to allocate the buffer with, so we test them
2717 // all get the most stringent ones.
2718 for (i, _mt) in self.memory_types.iter().enumerate() {
2719 let (storage, cache) = MemoryTypes::describe(i);
2720 let options = conv::resource_options_from_storage_and_cache(storage, cache);
2721 let requirements = self
2722 .shared
2723 .device
2724 .lock()
2725 .heap_buffer_size_and_align(size, options);
2726 max_size = cmp::max(max_size, requirements.size);
2727 max_alignment = cmp::max(max_alignment, requirements.align);
2728 }
2729 }
2730
2731 // based on Metal validation error for view creation:
2732 // failed assertion `BytesPerRow of a buffer-backed texture with pixelFormat(XXX) must be aligned to 256 bytes
2733 const SIZE_MASK: u64 = 0xFF;
2734 let supports_texel_view =
2735 usage.intersects(buffer::Usage::UNIFORM_TEXEL | buffer::Usage::STORAGE_TEXEL);
2736
2737 memory::Requirements {
2738 size: (max_size + SIZE_MASK) & !SIZE_MASK,
2739 alignment: max_alignment,
2740 type_mask: if !supports_texel_view || self.shared.private_caps.shared_textures {
2741 MemoryTypes::all().bits()
2742 } else {
2743 (MemoryTypes::all() ^ MemoryTypes::SHARED).bits()
2744 },
2745 }
2746 }
2747
bind_buffer_memory( &self, memory: &n::Memory, offset: u64, buffer: &mut n::Buffer, ) -> Result<(), d::BindError>2748 unsafe fn bind_buffer_memory(
2749 &self,
2750 memory: &n::Memory,
2751 offset: u64,
2752 buffer: &mut n::Buffer,
2753 ) -> Result<(), d::BindError> {
2754 profiling::scope!("bind_buffer_memory");
2755 let (size, name) = match buffer {
2756 n::Buffer::Unbound { size, name, .. } => (*size, name),
2757 n::Buffer::Bound { .. } => panic!("Unexpected Buffer::Bound"),
2758 };
2759 debug!("bind_buffer_memory of size {} at offset {}", size, offset);
2760 *buffer = match memory.heap {
2761 n::MemoryHeap::Native(ref heap) => {
2762 let options = conv::resource_options_from_storage_and_cache(
2763 heap.storage_mode(),
2764 heap.cpu_cache_mode(),
2765 );
2766 let raw = heap.new_buffer(size, options).unwrap_or_else(|| {
2767 // TODO: disable hazard tracking?
2768 self.shared.device.lock().new_buffer(size, options)
2769 });
2770 raw.set_label(name);
2771 n::Buffer::Bound {
2772 raw,
2773 options,
2774 range: 0..size, //TODO?
2775 }
2776 }
2777 n::MemoryHeap::Public(mt, ref cpu_buffer) => {
2778 debug!(
2779 "\tmapped to public heap with address {:?}",
2780 cpu_buffer.as_ptr()
2781 );
2782 let (storage, cache) = MemoryTypes::describe(mt.0);
2783 let options = conv::resource_options_from_storage_and_cache(storage, cache);
2784 if offset == 0x0 && size == cpu_buffer.length() {
2785 cpu_buffer.set_label(name);
2786 } else if self.shared.private_caps.supports_debug_markers {
2787 cpu_buffer.add_debug_marker(
2788 name,
2789 NSRange {
2790 location: offset,
2791 length: size,
2792 },
2793 );
2794 }
2795 n::Buffer::Bound {
2796 raw: cpu_buffer.clone(),
2797 options,
2798 range: offset..offset + size,
2799 }
2800 }
2801 n::MemoryHeap::Private => {
2802 //TODO: check for aliasing
2803 let options = MTLResourceOptions::StorageModePrivate
2804 | MTLResourceOptions::CPUCacheModeDefaultCache;
2805 let raw = self.shared.device.lock().new_buffer(size, options);
2806 raw.set_label(name);
2807 n::Buffer::Bound {
2808 raw,
2809 options,
2810 range: 0..size,
2811 }
2812 }
2813 };
2814
2815 Ok(())
2816 }
2817
destroy_buffer(&self, buffer: n::Buffer)2818 unsafe fn destroy_buffer(&self, buffer: n::Buffer) {
2819 if let n::Buffer::Bound { raw, range, .. } = buffer {
2820 debug!(
2821 "destroy_buffer {:?} occupying memory {:?}",
2822 raw.as_ptr(),
2823 range
2824 );
2825 }
2826 }
2827
create_buffer_view( &self, buffer: &n::Buffer, format_maybe: Option<format::Format>, sub: buffer::SubRange, ) -> Result<n::BufferView, buffer::ViewCreationError>2828 unsafe fn create_buffer_view(
2829 &self,
2830 buffer: &n::Buffer,
2831 format_maybe: Option<format::Format>,
2832 sub: buffer::SubRange,
2833 ) -> Result<n::BufferView, buffer::ViewCreationError> {
2834 let (raw, base_range, options) = match *buffer {
2835 n::Buffer::Bound {
2836 ref raw,
2837 ref range,
2838 options,
2839 } => (raw, range, options),
2840 n::Buffer::Unbound { .. } => panic!("Unexpected Buffer::Unbound"),
2841 };
2842 let start = base_range.start + sub.offset;
2843 let size_rough = sub.size.unwrap_or(base_range.end - start);
2844 let format = match format_maybe {
2845 Some(fmt) => fmt,
2846 None => {
2847 return Err(buffer::ViewCreationError::UnsupportedFormat(format_maybe));
2848 }
2849 };
2850 let format_desc = format.surface_desc();
2851 if format_desc.aspects != format::Aspects::COLOR || format_desc.is_compressed() {
2852 // Vadlidator says "Linear texture: cannot create compressed, depth, or stencil textures"
2853 return Err(buffer::ViewCreationError::UnsupportedFormat(format_maybe));
2854 }
2855
2856 //Note: we rely on SPIRV-Cross to use the proper 2D texel indexing here
2857 let texel_count = size_rough * 8 / format_desc.bits as u64;
2858 let col_count = cmp::min(texel_count, self.shared.private_caps.max_texture_size);
2859 let row_count = (texel_count + self.shared.private_caps.max_texture_size - 1)
2860 / self.shared.private_caps.max_texture_size;
2861 let mtl_format = self
2862 .shared
2863 .private_caps
2864 .map_format(format)
2865 .ok_or(buffer::ViewCreationError::UnsupportedFormat(format_maybe))?;
2866
2867 let descriptor = metal::TextureDescriptor::new();
2868 descriptor.set_texture_type(MTLTextureType::D2);
2869 descriptor.set_width(col_count);
2870 descriptor.set_height(row_count);
2871 descriptor.set_mipmap_level_count(1);
2872 descriptor.set_pixel_format(mtl_format);
2873 descriptor.set_resource_options(options);
2874 descriptor.set_storage_mode(raw.storage_mode());
2875 descriptor.set_usage(metal::MTLTextureUsage::ShaderRead);
2876
2877 let align_mask = self.shared.private_caps.buffer_alignment - 1;
2878 let stride = (col_count * (format_desc.bits as u64 / 8) + align_mask) & !align_mask;
2879
2880 Ok(n::BufferView {
2881 raw: raw.new_texture_with_descriptor(&descriptor, start, stride),
2882 })
2883 }
2884
destroy_buffer_view(&self, _view: n::BufferView)2885 unsafe fn destroy_buffer_view(&self, _view: n::BufferView) {
2886 //nothing to do
2887 }
2888
create_image( &self, kind: image::Kind, mip_levels: image::Level, format: format::Format, tiling: image::Tiling, usage: image::Usage, _sparse: memory::SparseFlags, view_caps: image::ViewCapabilities, ) -> Result<n::Image, image::CreationError>2889 unsafe fn create_image(
2890 &self,
2891 kind: image::Kind,
2892 mip_levels: image::Level,
2893 format: format::Format,
2894 tiling: image::Tiling,
2895 usage: image::Usage,
2896 _sparse: memory::SparseFlags,
2897 view_caps: image::ViewCapabilities,
2898 ) -> Result<n::Image, image::CreationError> {
2899 profiling::scope!("create_image");
2900 debug!(
2901 "create_image {:?} with {} mips of {:?} {:?} and usage {:?} with {:?}",
2902 kind, mip_levels, format, tiling, usage, view_caps
2903 );
2904
2905 let is_cube = view_caps.contains(image::ViewCapabilities::KIND_CUBE);
2906 let mtl_format = self
2907 .shared
2908 .private_caps
2909 .map_format(format)
2910 .ok_or_else(|| image::CreationError::Format(format))?;
2911
2912 let descriptor = metal::TextureDescriptor::new();
2913
2914 let (mtl_type, num_layers) = match kind {
2915 image::Kind::D1(_, 1) => {
2916 assert!(!is_cube);
2917 (MTLTextureType::D1, None)
2918 }
2919 image::Kind::D1(_, layers) => {
2920 assert!(!is_cube);
2921 (MTLTextureType::D1Array, Some(layers))
2922 }
2923 image::Kind::D2(_, _, layers, 1) => {
2924 if is_cube && layers > 6 {
2925 assert_eq!(layers % 6, 0);
2926 (MTLTextureType::CubeArray, Some(layers / 6))
2927 } else if is_cube {
2928 assert_eq!(layers, 6);
2929 (MTLTextureType::Cube, None)
2930 } else if layers > 1 {
2931 (MTLTextureType::D2Array, Some(layers))
2932 } else {
2933 (MTLTextureType::D2, None)
2934 }
2935 }
2936 image::Kind::D2(_, _, 1, samples) if !is_cube => {
2937 descriptor.set_sample_count(samples as u64);
2938 (MTLTextureType::D2Multisample, None)
2939 }
2940 image::Kind::D2(..) => {
2941 error!(
2942 "Multi-sampled array textures or cubes are not supported: {:?}",
2943 kind
2944 );
2945 return Err(image::CreationError::Kind);
2946 }
2947 image::Kind::D3(..) => {
2948 assert!(!is_cube);
2949 if view_caps.contains(image::ViewCapabilities::KIND_2D_ARRAY) {
2950 warn!("Unable to support 2D array views of 3D textures");
2951 }
2952 (MTLTextureType::D3, None)
2953 }
2954 };
2955
2956 descriptor.set_texture_type(mtl_type);
2957 if let Some(count) = num_layers {
2958 descriptor.set_array_length(count as u64);
2959 }
2960 let extent = kind.extent();
2961 descriptor.set_width(extent.width as u64);
2962 descriptor.set_height(extent.height as u64);
2963 descriptor.set_depth(extent.depth as u64);
2964 descriptor.set_mipmap_level_count(mip_levels as u64);
2965 descriptor.set_pixel_format(mtl_format);
2966 descriptor.set_usage(conv::map_texture_usage(usage, tiling, view_caps));
2967
2968 let base = format.base_format();
2969 let format_desc = base.0.desc();
2970 let mip_sizes = (0..mip_levels)
2971 .map(|level| {
2972 let pitches = n::Image::pitches_impl(extent.at_level(level), format_desc);
2973 num_layers.unwrap_or(1) as buffer::Offset * pitches[3]
2974 })
2975 .collect();
2976
2977 let host_usage = image::Usage::TRANSFER_SRC | image::Usage::TRANSFER_DST;
2978 let host_visible = mtl_type == MTLTextureType::D2
2979 && mip_levels == 1
2980 && num_layers.is_none()
2981 && format_desc.aspects.contains(format::Aspects::COLOR)
2982 && tiling == image::Tiling::Linear
2983 && host_usage.contains(usage);
2984
2985 Ok(n::Image {
2986 like: n::ImageLike::Unbound {
2987 descriptor,
2988 mip_sizes,
2989 host_visible,
2990 name: String::new(),
2991 },
2992 kind,
2993 mip_levels,
2994 format_desc,
2995 shader_channel: base.1.into(),
2996 mtl_format,
2997 mtl_type,
2998 })
2999 }
3000
get_image_requirements(&self, image: &n::Image) -> memory::Requirements3001 unsafe fn get_image_requirements(&self, image: &n::Image) -> memory::Requirements {
3002 let (descriptor, mip_sizes, host_visible) = match image.like {
3003 n::ImageLike::Unbound {
3004 ref descriptor,
3005 ref mip_sizes,
3006 host_visible,
3007 ..
3008 } => (descriptor, mip_sizes, host_visible),
3009 n::ImageLike::Texture(..) | n::ImageLike::Buffer(..) => {
3010 panic!("Expected Image::Unbound")
3011 }
3012 };
3013
3014 if self.shared.private_caps.resource_heaps {
3015 // We don't know what memory type the user will try to allocate the image with, so we test them
3016 // all get the most stringent ones. Note we don't check Shared because heaps can't use it
3017 let mut max_size = 0;
3018 let mut max_alignment = 0;
3019 let types = if host_visible {
3020 MemoryTypes::all()
3021 } else {
3022 MemoryTypes::PRIVATE
3023 };
3024 for (i, _) in self.memory_types.iter().enumerate() {
3025 if !types.contains(MemoryTypes::from_bits(1 << i).unwrap()) {
3026 continue;
3027 }
3028 let (storage, cache_mode) = MemoryTypes::describe(i);
3029 descriptor.set_storage_mode(storage);
3030 descriptor.set_cpu_cache_mode(cache_mode);
3031
3032 let requirements = self
3033 .shared
3034 .device
3035 .lock()
3036 .heap_texture_size_and_align(descriptor);
3037 max_size = cmp::max(max_size, requirements.size);
3038 max_alignment = cmp::max(max_alignment, requirements.align);
3039 }
3040 memory::Requirements {
3041 size: max_size,
3042 alignment: max_alignment,
3043 type_mask: types.bits(),
3044 }
3045 } else if host_visible {
3046 assert_eq!(mip_sizes.len(), 1);
3047 let mask = self.shared.private_caps.buffer_alignment - 1;
3048 memory::Requirements {
3049 size: (mip_sizes[0] + mask) & !mask,
3050 alignment: self.shared.private_caps.buffer_alignment,
3051 type_mask: MemoryTypes::all().bits(),
3052 }
3053 } else {
3054 memory::Requirements {
3055 size: mip_sizes.iter().sum(),
3056 alignment: 4,
3057 type_mask: MemoryTypes::PRIVATE.bits(),
3058 }
3059 }
3060 }
3061
get_image_subresource_footprint( &self, image: &n::Image, sub: image::Subresource, ) -> image::SubresourceFootprint3062 unsafe fn get_image_subresource_footprint(
3063 &self,
3064 image: &n::Image,
3065 sub: image::Subresource,
3066 ) -> image::SubresourceFootprint {
3067 let num_layers = image.kind.num_layers() as buffer::Offset;
3068 let level_offset = (0..sub.level).fold(0, |offset, level| {
3069 let pitches = image.pitches(level);
3070 offset + num_layers * pitches[3]
3071 });
3072 let pitches = image.pitches(sub.level);
3073 let layer_offset = level_offset + sub.layer as buffer::Offset * pitches[3];
3074 image::SubresourceFootprint {
3075 slice: layer_offset..layer_offset + pitches[3],
3076 row_pitch: pitches[1] as _,
3077 depth_pitch: pitches[2] as _,
3078 array_pitch: pitches[3] as _,
3079 }
3080 }
3081
bind_image_memory( &self, memory: &n::Memory, offset: u64, image: &mut n::Image, ) -> Result<(), d::BindError>3082 unsafe fn bind_image_memory(
3083 &self,
3084 memory: &n::Memory,
3085 offset: u64,
3086 image: &mut n::Image,
3087 ) -> Result<(), d::BindError> {
3088 profiling::scope!("bind_image_memory");
3089 let like = {
3090 let (descriptor, mip_sizes, name) = match image.like {
3091 n::ImageLike::Unbound {
3092 ref descriptor,
3093 ref mip_sizes,
3094 ref name,
3095 ..
3096 } => (descriptor, mip_sizes, name),
3097 n::ImageLike::Texture(..) | n::ImageLike::Buffer(..) => {
3098 panic!("Expected Image::Unbound")
3099 }
3100 };
3101
3102 match memory.heap {
3103 n::MemoryHeap::Native(ref heap) => {
3104 let resource_options = conv::resource_options_from_storage_and_cache(
3105 heap.storage_mode(),
3106 heap.cpu_cache_mode(),
3107 );
3108 descriptor.set_resource_options(resource_options);
3109 n::ImageLike::Texture(heap.new_texture(descriptor).unwrap_or_else(|| {
3110 // TODO: disable hazard tracking?
3111 let texture = self.shared.device.lock().new_texture(&descriptor);
3112 texture.set_label(name);
3113 texture
3114 }))
3115 }
3116 n::MemoryHeap::Public(_memory_type, ref cpu_buffer) => {
3117 assert_eq!(mip_sizes.len(), 1);
3118 if offset == 0x0 && cpu_buffer.length() == mip_sizes[0] {
3119 cpu_buffer.set_label(name);
3120 } else if self.shared.private_caps.supports_debug_markers {
3121 cpu_buffer.add_debug_marker(
3122 name,
3123 NSRange {
3124 location: offset,
3125 length: mip_sizes[0],
3126 },
3127 );
3128 }
3129 n::ImageLike::Buffer(n::Buffer::Bound {
3130 raw: cpu_buffer.clone(),
3131 range: offset..offset + mip_sizes[0] as u64,
3132 options: MTLResourceOptions::StorageModeShared,
3133 })
3134 }
3135 n::MemoryHeap::Private => {
3136 descriptor.set_storage_mode(MTLStorageMode::Private);
3137 let texture = self.shared.device.lock().new_texture(descriptor);
3138 texture.set_label(name);
3139 n::ImageLike::Texture(texture)
3140 }
3141 }
3142 };
3143
3144 Ok(image.like = like)
3145 }
3146
destroy_image(&self, _image: n::Image)3147 unsafe fn destroy_image(&self, _image: n::Image) {
3148 //nothing to do
3149 }
3150
create_image_view( &self, image: &n::Image, kind: image::ViewKind, format: format::Format, swizzle: format::Swizzle, _usage: image::Usage, range: image::SubresourceRange, ) -> Result<n::ImageView, image::ViewCreationError>3151 unsafe fn create_image_view(
3152 &self,
3153 image: &n::Image,
3154 kind: image::ViewKind,
3155 format: format::Format,
3156 swizzle: format::Swizzle,
3157 _usage: image::Usage,
3158 range: image::SubresourceRange,
3159 ) -> Result<n::ImageView, image::ViewCreationError> {
3160 profiling::scope!("create_image_view");
3161
3162 let mtl_format = match self
3163 .shared
3164 .private_caps
3165 .map_format_with_swizzle(format, swizzle)
3166 {
3167 Some(f) => f,
3168 None => {
3169 error!("failed to swizzle format {:?} with {:?}", format, swizzle);
3170 return Err(image::ViewCreationError::BadFormat(format));
3171 }
3172 };
3173 let raw = image.like.as_texture();
3174 let full_range = image::SubresourceRange {
3175 aspects: image.format_desc.aspects,
3176 ..Default::default()
3177 };
3178 let mtl_type = if image.mtl_type == MTLTextureType::D2Multisample {
3179 if kind != image::ViewKind::D2 {
3180 error!("Requested {:?} for MSAA texture", kind);
3181 }
3182 image.mtl_type
3183 } else {
3184 conv::map_texture_type(kind)
3185 };
3186
3187 let texture = if mtl_format == image.mtl_format
3188 && mtl_type == image.mtl_type
3189 && swizzle == format::Swizzle::NO
3190 && range == full_range
3191 {
3192 // Some images are marked as framebuffer-only, and we can't create aliases of them.
3193 // Also helps working around Metal bugs with aliased array textures.
3194 raw.to_owned()
3195 } else {
3196 raw.new_texture_view_from_slice(
3197 mtl_format,
3198 mtl_type,
3199 NSRange {
3200 location: range.level_start as _,
3201 length: range.resolve_level_count(image.mip_levels) as _,
3202 },
3203 NSRange {
3204 location: range.layer_start as _,
3205 length: range.resolve_layer_count(image.kind.num_layers()) as _,
3206 },
3207 )
3208 };
3209
3210 Ok(n::ImageView {
3211 texture,
3212 mtl_format,
3213 })
3214 }
3215
destroy_image_view(&self, _view: n::ImageView)3216 unsafe fn destroy_image_view(&self, _view: n::ImageView) {}
3217
create_fence(&self, signaled: bool) -> Result<n::Fence, d::OutOfMemory>3218 fn create_fence(&self, signaled: bool) -> Result<n::Fence, d::OutOfMemory> {
3219 debug!("Creating fence with signal={}", signaled);
3220 Ok(n::Fence::Idle { signaled })
3221 }
3222
reset_fence(&self, fence: &mut n::Fence) -> Result<(), d::OutOfMemory>3223 unsafe fn reset_fence(&self, fence: &mut n::Fence) -> Result<(), d::OutOfMemory> {
3224 debug!("Resetting fence ptr {:?}", fence);
3225 *fence = n::Fence::Idle { signaled: false };
3226 Ok(())
3227 }
3228
wait_for_fence( &self, fence: &n::Fence, timeout_ns: u64, ) -> Result<bool, d::WaitError>3229 unsafe fn wait_for_fence(
3230 &self,
3231 fence: &n::Fence,
3232 timeout_ns: u64,
3233 ) -> Result<bool, d::WaitError> {
3234 unsafe fn to_ns(duration: time::Duration) -> u64 {
3235 duration.as_secs() * 1_000_000_000 + duration.subsec_nanos() as u64
3236 }
3237
3238 debug!("wait_for_fence {:?} for {} ms", fence, timeout_ns);
3239 match *fence {
3240 n::Fence::Idle { signaled } => {
3241 if !signaled {
3242 warn!("Fence ptr {:?} is not pending, waiting not possible", fence);
3243 }
3244 Ok(signaled)
3245 }
3246 n::Fence::PendingSubmission(ref cmd_buf) => {
3247 if timeout_ns == !0 {
3248 cmd_buf.wait_until_completed();
3249 return Ok(true);
3250 }
3251 let start = time::Instant::now();
3252 loop {
3253 if let metal::MTLCommandBufferStatus::Completed = cmd_buf.status() {
3254 return Ok(true);
3255 }
3256 if to_ns(start.elapsed()) >= timeout_ns {
3257 return Ok(false);
3258 }
3259 thread::sleep(time::Duration::from_millis(1));
3260 self.shared.queue_blocker.lock().triage();
3261 }
3262 }
3263 }
3264 }
3265
get_fence_status(&self, fence: &n::Fence) -> Result<bool, d::DeviceLost>3266 unsafe fn get_fence_status(&self, fence: &n::Fence) -> Result<bool, d::DeviceLost> {
3267 Ok(match *fence {
3268 n::Fence::Idle { signaled } => signaled,
3269 n::Fence::PendingSubmission(ref cmd_buf) => match cmd_buf.status() {
3270 metal::MTLCommandBufferStatus::Completed => true,
3271 _ => false,
3272 },
3273 })
3274 }
3275
destroy_fence(&self, _fence: n::Fence)3276 unsafe fn destroy_fence(&self, _fence: n::Fence) {
3277 //empty
3278 }
3279
create_event(&self) -> Result<n::Event, d::OutOfMemory>3280 fn create_event(&self) -> Result<n::Event, d::OutOfMemory> {
3281 Ok(n::Event(Arc::new(AtomicBool::new(false))))
3282 }
3283
get_event_status(&self, event: &n::Event) -> Result<bool, d::WaitError>3284 unsafe fn get_event_status(&self, event: &n::Event) -> Result<bool, d::WaitError> {
3285 Ok(event.0.load(Ordering::Acquire))
3286 }
3287
set_event(&self, event: &mut n::Event) -> Result<(), d::OutOfMemory>3288 unsafe fn set_event(&self, event: &mut n::Event) -> Result<(), d::OutOfMemory> {
3289 event.0.store(true, Ordering::Release);
3290 self.shared.queue_blocker.lock().triage();
3291 Ok(())
3292 }
3293
reset_event(&self, event: &mut n::Event) -> Result<(), d::OutOfMemory>3294 unsafe fn reset_event(&self, event: &mut n::Event) -> Result<(), d::OutOfMemory> {
3295 Ok(event.0.store(false, Ordering::Release))
3296 }
3297
destroy_event(&self, _event: n::Event)3298 unsafe fn destroy_event(&self, _event: n::Event) {
3299 //empty
3300 }
3301
create_query_pool( &self, ty: query::Type, count: query::Id, ) -> Result<n::QueryPool, query::CreationError>3302 unsafe fn create_query_pool(
3303 &self,
3304 ty: query::Type,
3305 count: query::Id,
3306 ) -> Result<n::QueryPool, query::CreationError> {
3307 match ty {
3308 query::Type::Occlusion => {
3309 let range = self
3310 .shared
3311 .visibility
3312 .allocator
3313 .lock()
3314 .allocate_range(count)
3315 .map_err(|_| {
3316 error!("Not enough space to allocate an occlusion query pool");
3317 d::OutOfMemory::Host
3318 })?;
3319 Ok(n::QueryPool::Occlusion(range))
3320 }
3321 query::Type::Timestamp => {
3322 warn!("Timestamp queries are not really useful yet");
3323 Ok(n::QueryPool::Timestamp)
3324 }
3325 query::Type::PipelineStatistics(..) => Err(query::CreationError::Unsupported(ty)),
3326 }
3327 }
3328
destroy_query_pool(&self, pool: n::QueryPool)3329 unsafe fn destroy_query_pool(&self, pool: n::QueryPool) {
3330 match pool {
3331 n::QueryPool::Occlusion(range) => {
3332 self.shared.visibility.allocator.lock().free_range(range);
3333 }
3334 n::QueryPool::Timestamp => {}
3335 }
3336 }
3337
get_query_pool_results( &self, pool: &n::QueryPool, queries: Range<query::Id>, data: &mut [u8], stride: buffer::Stride, flags: query::ResultFlags, ) -> Result<bool, d::WaitError>3338 unsafe fn get_query_pool_results(
3339 &self,
3340 pool: &n::QueryPool,
3341 queries: Range<query::Id>,
3342 data: &mut [u8],
3343 stride: buffer::Stride,
3344 flags: query::ResultFlags,
3345 ) -> Result<bool, d::WaitError> {
3346 let is_ready = match *pool {
3347 n::QueryPool::Occlusion(ref pool_range) => {
3348 let visibility = &self.shared.visibility;
3349 let is_ready = if flags.contains(query::ResultFlags::WAIT) {
3350 let mut guard = visibility.allocator.lock();
3351 while !visibility.are_available(pool_range.start, &queries) {
3352 visibility.condvar.wait(&mut guard);
3353 }
3354 true
3355 } else {
3356 visibility.are_available(pool_range.start, &queries)
3357 };
3358
3359 let size_data = mem::size_of::<u64>() as buffer::Offset;
3360 if stride as u64 == size_data
3361 && flags.contains(query::ResultFlags::BITS_64)
3362 && !flags.contains(query::ResultFlags::WITH_AVAILABILITY)
3363 {
3364 // if stride is matching, copy everything in one go
3365 ptr::copy_nonoverlapping(
3366 (visibility.buffer.contents() as *const u8).offset(
3367 (pool_range.start + queries.start) as isize * size_data as isize,
3368 ),
3369 data.as_mut_ptr(),
3370 stride as usize * (queries.end - queries.start) as usize,
3371 );
3372 } else {
3373 // copy parts of individual entries
3374 for i in 0..queries.end - queries.start {
3375 let absolute_index = (pool_range.start + queries.start + i) as isize;
3376 let value =
3377 *(visibility.buffer.contents() as *const u64).offset(absolute_index);
3378 let base = (visibility.buffer.contents() as *const u8)
3379 .offset(visibility.availability_offset as isize);
3380 let availability = *(base as *const u32).offset(absolute_index);
3381 let data_ptr = data[i as usize * stride as usize..].as_mut_ptr();
3382 if flags.contains(query::ResultFlags::BITS_64) {
3383 *(data_ptr as *mut u64) = value;
3384 if flags.contains(query::ResultFlags::WITH_AVAILABILITY) {
3385 *(data_ptr as *mut u64).offset(1) = availability as u64;
3386 }
3387 } else {
3388 *(data_ptr as *mut u32) = value as u32;
3389 if flags.contains(query::ResultFlags::WITH_AVAILABILITY) {
3390 *(data_ptr as *mut u32).offset(1) = availability;
3391 }
3392 }
3393 }
3394 }
3395
3396 is_ready
3397 }
3398 n::QueryPool::Timestamp => {
3399 for d in data.iter_mut() {
3400 *d = 0;
3401 }
3402 true
3403 }
3404 };
3405
3406 Ok(is_ready)
3407 }
3408
wait_idle(&self) -> Result<(), d::OutOfMemory>3409 fn wait_idle(&self) -> Result<(), d::OutOfMemory> {
3410 command::QueueInner::wait_idle(&self.shared.queue);
3411 Ok(())
3412 }
3413
set_image_name(&self, image: &mut n::Image, name: &str)3414 unsafe fn set_image_name(&self, image: &mut n::Image, name: &str) {
3415 match image {
3416 n::Image {
3417 like: n::ImageLike::Buffer(ref mut buf),
3418 ..
3419 } => self.set_buffer_name(buf, name),
3420 n::Image {
3421 like: n::ImageLike::Texture(ref tex),
3422 ..
3423 } => tex.set_label(name),
3424 n::Image {
3425 like:
3426 n::ImageLike::Unbound {
3427 name: ref mut unbound_name,
3428 ..
3429 },
3430 ..
3431 } => {
3432 *unbound_name = name.to_string();
3433 }
3434 };
3435 }
3436
set_buffer_name(&self, buffer: &mut n::Buffer, name: &str)3437 unsafe fn set_buffer_name(&self, buffer: &mut n::Buffer, name: &str) {
3438 match buffer {
3439 n::Buffer::Unbound {
3440 name: ref mut unbound_name,
3441 ..
3442 } => {
3443 *unbound_name = name.to_string();
3444 }
3445 n::Buffer::Bound {
3446 ref raw, ref range, ..
3447 } => {
3448 if self.shared.private_caps.supports_debug_markers {
3449 raw.add_debug_marker(
3450 name,
3451 NSRange {
3452 location: range.start,
3453 length: range.end - range.start,
3454 },
3455 );
3456 }
3457 }
3458 }
3459 }
3460
set_command_buffer_name( &self, command_buffer: &mut command::CommandBuffer, name: &str, )3461 unsafe fn set_command_buffer_name(
3462 &self,
3463 command_buffer: &mut command::CommandBuffer,
3464 name: &str,
3465 ) {
3466 command_buffer.name = name.to_string();
3467 }
3468
set_semaphore_name(&self, _semaphore: &mut n::Semaphore, _name: &str)3469 unsafe fn set_semaphore_name(&self, _semaphore: &mut n::Semaphore, _name: &str) {}
3470
set_fence_name(&self, _fence: &mut n::Fence, _name: &str)3471 unsafe fn set_fence_name(&self, _fence: &mut n::Fence, _name: &str) {}
3472
set_framebuffer_name(&self, _framebuffer: &mut n::Framebuffer, _name: &str)3473 unsafe fn set_framebuffer_name(&self, _framebuffer: &mut n::Framebuffer, _name: &str) {}
3474
set_render_pass_name(&self, render_pass: &mut n::RenderPass, name: &str)3475 unsafe fn set_render_pass_name(&self, render_pass: &mut n::RenderPass, name: &str) {
3476 render_pass.name = name.to_string();
3477 }
3478
set_descriptor_set_name(&self, _descriptor_set: &mut n::DescriptorSet, _name: &str)3479 unsafe fn set_descriptor_set_name(&self, _descriptor_set: &mut n::DescriptorSet, _name: &str) {
3480 // TODO
3481 }
3482
set_descriptor_set_layout_name( &self, _descriptor_set_layout: &mut n::DescriptorSetLayout, _name: &str, )3483 unsafe fn set_descriptor_set_layout_name(
3484 &self,
3485 _descriptor_set_layout: &mut n::DescriptorSetLayout,
3486 _name: &str,
3487 ) {
3488 // TODO
3489 }
3490
set_pipeline_layout_name( &self, _pipeline_layout: &mut n::PipelineLayout, _name: &str, )3491 unsafe fn set_pipeline_layout_name(
3492 &self,
3493 _pipeline_layout: &mut n::PipelineLayout,
3494 _name: &str,
3495 ) {
3496 // TODO
3497 }
3498
start_capture(&self)3499 fn start_capture(&self) {
3500 let device = self.shared.device.lock();
3501 let shared_capture_manager = CaptureManager::shared();
3502 let default_capture_scope = shared_capture_manager.new_capture_scope_with_device(&device);
3503 shared_capture_manager.set_default_capture_scope(&default_capture_scope);
3504 shared_capture_manager.start_capture_with_scope(&default_capture_scope);
3505 default_capture_scope.begin_scope();
3506 }
3507
stop_capture(&self)3508 fn stop_capture(&self) {
3509 let shared_capture_manager = CaptureManager::shared();
3510 if let Some(default_capture_scope) = shared_capture_manager.default_capture_scope() {
3511 default_capture_scope.end_scope();
3512 }
3513 shared_capture_manager.stop_capture();
3514 }
3515 }
3516
3517 #[test]
test_send_sync()3518 fn test_send_sync() {
3519 fn foo<T: Send + Sync>() {}
3520 foo::<Device>()
3521 }
3522