1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #[cfg(feature = "trace")]
6 use crate::device::trace::Action;
7 use crate::{
8 command::{
9 texture_copy_view_to_hal, validate_linear_texture_data, validate_texture_copy_range,
10 CommandAllocator, CommandBuffer, CopySide, ImageCopyTexture, TransferError, BITS_PER_BYTE,
11 },
12 conv,
13 device::{alloc, DeviceError, WaitIdleError},
14 hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
15 id,
16 memory_init_tracker::{MemoryInitKind, MemoryInitTrackerAction},
17 resource::{Buffer, BufferAccessError, BufferMapState, BufferUse, TextureUse},
18 FastHashMap, FastHashSet,
19 };
20
21 use hal::{command::CommandBuffer as _, device::Device as _, queue::Queue as _};
22 use smallvec::SmallVec;
23 use std::{iter, ops::Range, ptr};
24 use thiserror::Error;
25
26 struct StagingData<B: hal::Backend> {
27 buffer: B::Buffer,
28 memory: alloc::MemoryBlock<B>,
29 cmdbuf: B::CommandBuffer,
30 }
31
32 #[derive(Debug)]
33 pub enum TempResource<B: hal::Backend> {
34 Buffer(B::Buffer),
35 Image(B::Image),
36 }
37
38 #[derive(Debug)]
39 pub(crate) struct PendingWrites<B: hal::Backend> {
40 pub command_buffer: Option<B::CommandBuffer>,
41 pub temp_resources: Vec<(TempResource<B>, alloc::MemoryBlock<B>)>,
42 pub dst_buffers: FastHashSet<id::BufferId>,
43 pub dst_textures: FastHashSet<id::TextureId>,
44 }
45
46 impl<B: hal::Backend> PendingWrites<B> {
new() -> Self47 pub fn new() -> Self {
48 Self {
49 command_buffer: None,
50 temp_resources: Vec::new(),
51 dst_buffers: FastHashSet::default(),
52 dst_textures: FastHashSet::default(),
53 }
54 }
55
dispose( self, device: &B::Device, cmd_allocator: &CommandAllocator<B>, mem_allocator: &mut alloc::MemoryAllocator<B>, )56 pub fn dispose(
57 self,
58 device: &B::Device,
59 cmd_allocator: &CommandAllocator<B>,
60 mem_allocator: &mut alloc::MemoryAllocator<B>,
61 ) {
62 if let Some(raw) = self.command_buffer {
63 cmd_allocator.discard_internal(raw);
64 }
65 for (resource, memory) in self.temp_resources {
66 mem_allocator.free(device, memory);
67 match resource {
68 TempResource::Buffer(buffer) => unsafe {
69 device.destroy_buffer(buffer);
70 },
71 TempResource::Image(image) => unsafe {
72 device.destroy_image(image);
73 },
74 }
75 }
76 }
77
consume_temp(&mut self, resource: TempResource<B>, memory: alloc::MemoryBlock<B>)78 pub fn consume_temp(&mut self, resource: TempResource<B>, memory: alloc::MemoryBlock<B>) {
79 self.temp_resources.push((resource, memory));
80 }
81
consume(&mut self, stage: StagingData<B>)82 fn consume(&mut self, stage: StagingData<B>) {
83 self.temp_resources
84 .push((TempResource::Buffer(stage.buffer), stage.memory));
85 self.command_buffer = Some(stage.cmdbuf);
86 }
87
88 #[must_use]
finish(&mut self) -> Option<B::CommandBuffer>89 fn finish(&mut self) -> Option<B::CommandBuffer> {
90 self.dst_buffers.clear();
91 self.dst_textures.clear();
92 self.command_buffer.take().map(|mut cmd_buf| unsafe {
93 cmd_buf.finish();
94 cmd_buf
95 })
96 }
97
borrow_cmd_buf(&mut self, cmd_allocator: &CommandAllocator<B>) -> &mut B::CommandBuffer98 fn borrow_cmd_buf(&mut self, cmd_allocator: &CommandAllocator<B>) -> &mut B::CommandBuffer {
99 if self.command_buffer.is_none() {
100 let mut cmdbuf = cmd_allocator.allocate_internal();
101 unsafe {
102 cmdbuf.begin_primary(hal::command::CommandBufferFlags::ONE_TIME_SUBMIT);
103 }
104 self.command_buffer = Some(cmdbuf);
105 }
106 self.command_buffer.as_mut().unwrap()
107 }
108 }
109
110 #[derive(Default)]
111 struct RequiredBufferInits {
112 map: FastHashMap<id::BufferId, Vec<Range<wgt::BufferAddress>>>,
113 }
114
115 impl RequiredBufferInits {
add<B: hal::Backend>( &mut self, buffer_memory_init_actions: &[MemoryInitTrackerAction<id::BufferId>], buffer_guard: &mut Storage<Buffer<B>, id::BufferId>, ) -> Result<(), QueueSubmitError>116 fn add<B: hal::Backend>(
117 &mut self,
118 buffer_memory_init_actions: &[MemoryInitTrackerAction<id::BufferId>],
119 buffer_guard: &mut Storage<Buffer<B>, id::BufferId>,
120 ) -> Result<(), QueueSubmitError> {
121 for buffer_use in buffer_memory_init_actions.iter() {
122 let buffer = buffer_guard
123 .get_mut(buffer_use.id)
124 .map_err(|_| QueueSubmitError::DestroyedBuffer(buffer_use.id))?;
125
126 let uninitialized_ranges = buffer.initialization_status.drain(buffer_use.range.clone());
127 match buffer_use.kind {
128 MemoryInitKind::ImplicitlyInitialized => {
129 uninitialized_ranges.for_each(drop);
130 }
131 MemoryInitKind::NeedsInitializedMemory => {
132 self.map
133 .entry(buffer_use.id)
134 .or_default()
135 .extend(uninitialized_ranges);
136 }
137 }
138 }
139 Ok(())
140 }
141 }
142
143 impl<B: hal::Backend> super::Device<B> {
borrow_pending_writes(&mut self) -> &mut B::CommandBuffer144 pub fn borrow_pending_writes(&mut self) -> &mut B::CommandBuffer {
145 self.pending_writes.borrow_cmd_buf(&self.cmd_allocator)
146 }
147
prepare_stage(&mut self, size: wgt::BufferAddress) -> Result<StagingData<B>, DeviceError>148 fn prepare_stage(&mut self, size: wgt::BufferAddress) -> Result<StagingData<B>, DeviceError> {
149 profiling::scope!("prepare_stage");
150 let mut buffer = unsafe {
151 self.raw
152 .create_buffer(
153 size,
154 hal::buffer::Usage::TRANSFER_SRC,
155 hal::memory::SparseFlags::empty(),
156 )
157 .map_err(|err| match err {
158 hal::buffer::CreationError::OutOfMemory(_) => DeviceError::OutOfMemory,
159 _ => panic!("failed to create staging buffer: {}", err),
160 })?
161 };
162 //TODO: do we need to transition into HOST_WRITE access first?
163 let requirements = unsafe {
164 self.raw.set_buffer_name(&mut buffer, "<write_buffer_temp>");
165 self.raw.get_buffer_requirements(&buffer)
166 };
167
168 let block = self.mem_allocator.lock().allocate(
169 &self.raw,
170 requirements,
171 gpu_alloc::UsageFlags::UPLOAD | gpu_alloc::UsageFlags::TRANSIENT,
172 )?;
173 block.bind_buffer(&self.raw, &mut buffer)?;
174
175 let cmdbuf = match self.pending_writes.command_buffer.take() {
176 Some(cmdbuf) => cmdbuf,
177 None => {
178 let mut cmdbuf = self.cmd_allocator.allocate_internal();
179 unsafe {
180 cmdbuf.begin_primary(hal::command::CommandBufferFlags::ONE_TIME_SUBMIT);
181 }
182 cmdbuf
183 }
184 };
185 Ok(StagingData {
186 buffer,
187 memory: block,
188 cmdbuf,
189 })
190 }
191
initialize_buffer_memory( &mut self, mut required_buffer_inits: RequiredBufferInits, buffer_guard: &mut Storage<Buffer<B>, id::BufferId>, ) -> Result<(), QueueSubmitError>192 fn initialize_buffer_memory(
193 &mut self,
194 mut required_buffer_inits: RequiredBufferInits,
195 buffer_guard: &mut Storage<Buffer<B>, id::BufferId>,
196 ) -> Result<(), QueueSubmitError> {
197 self.pending_writes
198 .dst_buffers
199 .extend(required_buffer_inits.map.keys());
200
201 let cmd_buf = self.pending_writes.borrow_cmd_buf(&self.cmd_allocator);
202 let mut trackers = self.trackers.lock();
203
204 for (buffer_id, mut ranges) in required_buffer_inits.map.drain() {
205 // Collapse touching ranges. We can't do this any earlier since we only now gathered ranges from several different command buffers!
206 ranges.sort_by(|a, b| a.start.cmp(&b.start));
207 for i in (1..ranges.len()).rev() {
208 assert!(ranges[i - 1].end <= ranges[i].start); // The memory init tracker made sure of this!
209 if ranges[i].start == ranges[i - 1].end {
210 ranges[i - 1].end = ranges[i].end;
211 ranges.swap_remove(i); // Ordering not important at this point
212 }
213 }
214
215 // Don't do use_replace since the buffer may already no longer have a ref_count.
216 // However, we *know* that it is currently in use, so the tracker must already know about it.
217 let transition = trackers.buffers.change_replace_tracked(
218 id::Valid(buffer_id),
219 (),
220 BufferUse::COPY_DST,
221 );
222 let buffer = buffer_guard.get(buffer_id).unwrap();
223 let &(ref buffer_raw, _) = buffer
224 .raw
225 .as_ref()
226 .ok_or(QueueSubmitError::DestroyedBuffer(buffer_id))?;
227 unsafe {
228 cmd_buf.pipeline_barrier(
229 super::all_buffer_stages()..hal::pso::PipelineStage::TRANSFER,
230 hal::memory::Dependencies::empty(),
231 transition.map(|pending| pending.into_hal(buffer)),
232 );
233 }
234 for range in ranges {
235 let size = range.end - range.start;
236
237 assert!(range.start % 4 == 0, "Buffer {:?} has an uninitialized range with a start not aligned to 4 (start was {})", buffer, range.start);
238 assert!(size % 4 == 0, "Buffer {:?} has an uninitialized range with a size not aligned to 4 (size was {})", buffer, size);
239
240 unsafe {
241 cmd_buf.fill_buffer(
242 buffer_raw,
243 hal::buffer::SubRange {
244 offset: range.start,
245 size: Some(size),
246 },
247 0,
248 );
249 }
250 }
251 }
252
253 Ok(())
254 }
255 }
256
257 #[derive(Clone, Debug, Error)]
258 #[error("queue is invalid")]
259 pub struct InvalidQueue;
260
261 #[derive(Clone, Debug, Error)]
262 pub enum QueueWriteError {
263 #[error(transparent)]
264 Queue(#[from] DeviceError),
265 #[error(transparent)]
266 Transfer(#[from] TransferError),
267 }
268
269 #[derive(Clone, Debug, Error)]
270 pub enum QueueSubmitError {
271 #[error(transparent)]
272 Queue(#[from] DeviceError),
273 #[error("buffer {0:?} is destroyed")]
274 DestroyedBuffer(id::BufferId),
275 #[error("texture {0:?} is destroyed")]
276 DestroyedTexture(id::TextureId),
277 #[error(transparent)]
278 Unmap(#[from] BufferAccessError),
279 #[error("swap chain output was dropped before the command buffer got submitted")]
280 SwapChainOutputDropped,
281 #[error("GPU got stuck :(")]
282 StuckGpu,
283 }
284
285 //TODO: move out common parts of write_xxx.
286
287 impl<G: GlobalIdentityHandlerFactory> Global<G> {
queue_write_buffer<B: GfxBackend>( &self, queue_id: id::QueueId, buffer_id: id::BufferId, buffer_offset: wgt::BufferAddress, data: &[u8], ) -> Result<(), QueueWriteError>288 pub fn queue_write_buffer<B: GfxBackend>(
289 &self,
290 queue_id: id::QueueId,
291 buffer_id: id::BufferId,
292 buffer_offset: wgt::BufferAddress,
293 data: &[u8],
294 ) -> Result<(), QueueWriteError> {
295 profiling::scope!("write_buffer", "Queue");
296
297 let hub = B::hub(self);
298 let mut token = Token::root();
299 let (mut device_guard, mut token) = hub.devices.write(&mut token);
300 let device = device_guard
301 .get_mut(queue_id)
302 .map_err(|_| DeviceError::Invalid)?;
303 let (buffer_guard, _) = hub.buffers.read(&mut token);
304
305 #[cfg(feature = "trace")]
306 if let Some(ref trace) = device.trace {
307 let mut trace = trace.lock();
308 let data_path = trace.make_binary("bin", data);
309 trace.add(Action::WriteBuffer {
310 id: buffer_id,
311 data: data_path,
312 range: buffer_offset..buffer_offset + data.len() as wgt::BufferAddress,
313 queued: true,
314 });
315 }
316
317 let data_size = data.len() as wgt::BufferAddress;
318 if data_size == 0 {
319 log::trace!("Ignoring write_buffer of size 0");
320 return Ok(());
321 }
322
323 let mut stage = device.prepare_stage(data_size)?;
324 stage.memory.write_bytes(&device.raw, 0, data)?;
325
326 let mut trackers = device.trackers.lock();
327 let (dst, transition) = trackers
328 .buffers
329 .use_replace(&*buffer_guard, buffer_id, (), BufferUse::COPY_DST)
330 .map_err(TransferError::InvalidBuffer)?;
331 let &(ref dst_raw, _) = dst
332 .raw
333 .as_ref()
334 .ok_or(TransferError::InvalidBuffer(buffer_id))?;
335 if !dst.usage.contains(wgt::BufferUsage::COPY_DST) {
336 return Err(TransferError::MissingCopyDstUsageFlag(Some(buffer_id), None).into());
337 }
338 dst.life_guard.use_at(device.active_submission_index + 1);
339
340 if data_size % wgt::COPY_BUFFER_ALIGNMENT != 0 {
341 return Err(TransferError::UnalignedCopySize(data_size).into());
342 }
343 if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 {
344 return Err(TransferError::UnalignedBufferOffset(buffer_offset).into());
345 }
346 if buffer_offset + data_size > dst.size {
347 return Err(TransferError::BufferOverrun {
348 start_offset: buffer_offset,
349 end_offset: buffer_offset + data_size,
350 buffer_size: dst.size,
351 side: CopySide::Destination,
352 }
353 .into());
354 }
355
356 let region = hal::command::BufferCopy {
357 src: 0,
358 dst: buffer_offset,
359 size: data.len() as _,
360 };
361 unsafe {
362 stage.cmdbuf.pipeline_barrier(
363 super::all_buffer_stages()..hal::pso::PipelineStage::TRANSFER,
364 hal::memory::Dependencies::empty(),
365 iter::once(hal::memory::Barrier::Buffer {
366 states: hal::buffer::Access::HOST_WRITE..hal::buffer::Access::TRANSFER_READ,
367 target: &stage.buffer,
368 range: hal::buffer::SubRange::WHOLE,
369 families: None,
370 })
371 .chain(transition.map(|pending| pending.into_hal(dst))),
372 );
373 stage
374 .cmdbuf
375 .copy_buffer(&stage.buffer, dst_raw, iter::once(region));
376 }
377
378 device.pending_writes.consume(stage);
379 device.pending_writes.dst_buffers.insert(buffer_id);
380
381 // Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding.
382 {
383 drop(buffer_guard);
384 let (mut buffer_guard, _) = hub.buffers.write(&mut token);
385
386 let dst = buffer_guard.get_mut(buffer_id).unwrap();
387 dst.initialization_status
388 .clear(buffer_offset..(buffer_offset + data_size));
389 }
390
391 Ok(())
392 }
393
queue_write_texture<B: GfxBackend>( &self, queue_id: id::QueueId, destination: &ImageCopyTexture, data: &[u8], data_layout: &wgt::ImageDataLayout, size: &wgt::Extent3d, ) -> Result<(), QueueWriteError>394 pub fn queue_write_texture<B: GfxBackend>(
395 &self,
396 queue_id: id::QueueId,
397 destination: &ImageCopyTexture,
398 data: &[u8],
399 data_layout: &wgt::ImageDataLayout,
400 size: &wgt::Extent3d,
401 ) -> Result<(), QueueWriteError> {
402 profiling::scope!("write_texture", "Queue");
403
404 let hub = B::hub(self);
405 let mut token = Token::root();
406 let (mut device_guard, mut token) = hub.devices.write(&mut token);
407 let device = device_guard
408 .get_mut(queue_id)
409 .map_err(|_| DeviceError::Invalid)?;
410 let (texture_guard, _) = hub.textures.read(&mut token);
411 let (image_layers, image_range, image_offset) =
412 texture_copy_view_to_hal(destination, size, &*texture_guard)?;
413
414 #[cfg(feature = "trace")]
415 if let Some(ref trace) = device.trace {
416 let mut trace = trace.lock();
417 let data_path = trace.make_binary("bin", data);
418 trace.add(Action::WriteTexture {
419 to: destination.clone(),
420 data: data_path,
421 layout: *data_layout,
422 size: *size,
423 });
424 }
425
426 if size.width == 0 || size.height == 0 || size.depth_or_array_layers == 0 {
427 log::trace!("Ignoring write_texture of size 0");
428 return Ok(());
429 }
430
431 let texture_format = texture_guard.get(destination.texture).unwrap().format;
432 let bytes_per_block = conv::map_texture_format(texture_format, device.private_features)
433 .surface_desc()
434 .bits as u32
435 / BITS_PER_BYTE;
436 validate_linear_texture_data(
437 data_layout,
438 texture_format,
439 data.len() as wgt::BufferAddress,
440 CopySide::Source,
441 bytes_per_block as wgt::BufferAddress,
442 size,
443 false,
444 )?;
445
446 let (block_width, block_height) = texture_format.describe().block_dimensions;
447 let block_width = block_width as u32;
448 let block_height = block_height as u32;
449
450 if !conv::is_valid_copy_dst_texture_format(texture_format) {
451 return Err(TransferError::CopyToForbiddenTextureFormat(texture_format).into());
452 }
453 let width_blocks = size.width / block_width;
454 let height_blocks = size.height / block_width;
455
456 let texel_rows_per_image = if let Some(rows_per_image) = data_layout.rows_per_image {
457 rows_per_image.get()
458 } else {
459 // doesn't really matter because we need this only if we copy more than one layer, and then we validate for this being not None
460 size.height
461 };
462 let block_rows_per_image = texel_rows_per_image / block_height;
463
464 let bytes_per_row_alignment = get_lowest_common_denom(
465 device.hal_limits.optimal_buffer_copy_pitch_alignment as u32,
466 bytes_per_block,
467 );
468 let stage_bytes_per_row = align_to(bytes_per_block * width_blocks, bytes_per_row_alignment);
469
470 let block_rows_in_copy =
471 (size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks;
472 let stage_size = stage_bytes_per_row as u64 * block_rows_in_copy as u64;
473 let mut stage = device.prepare_stage(stage_size)?;
474
475 let mut trackers = device.trackers.lock();
476 let (dst, transition) = trackers
477 .textures
478 .use_replace(
479 &*texture_guard,
480 destination.texture,
481 image_range,
482 TextureUse::COPY_DST,
483 )
484 .unwrap();
485 let &(ref dst_raw, _) = dst
486 .raw
487 .as_ref()
488 .ok_or(TransferError::InvalidTexture(destination.texture))?;
489
490 if !dst.usage.contains(wgt::TextureUsage::COPY_DST) {
491 return Err(
492 TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(),
493 );
494 }
495 validate_texture_copy_range(
496 destination,
497 dst.format,
498 dst.kind,
499 CopySide::Destination,
500 size,
501 )?;
502 dst.life_guard.use_at(device.active_submission_index + 1);
503
504 let bytes_per_row = if let Some(bytes_per_row) = data_layout.bytes_per_row {
505 bytes_per_row.get()
506 } else {
507 width_blocks * bytes_per_block
508 };
509
510 let ptr = stage.memory.map(&device.raw, 0, stage_size)?;
511 unsafe {
512 profiling::scope!("copy");
513 //TODO: https://github.com/zakarumych/gpu-alloc/issues/13
514 if stage_bytes_per_row == bytes_per_row {
515 // Fast path if the data isalready being aligned optimally.
516 ptr::copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), stage_size as usize);
517 } else {
518 // Copy row by row into the optimal alignment.
519 let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
520 for layer in 0..size.depth_or_array_layers {
521 let rows_offset = layer * block_rows_per_image;
522 for row in 0..height_blocks {
523 ptr::copy_nonoverlapping(
524 data.as_ptr()
525 .offset((rows_offset + row) as isize * bytes_per_row as isize),
526 ptr.as_ptr().offset(
527 (rows_offset + row) as isize * stage_bytes_per_row as isize,
528 ),
529 copy_bytes_per_row,
530 );
531 }
532 }
533 }
534 }
535 stage.memory.unmap(&device.raw);
536 if !stage.memory.is_coherent() {
537 stage.memory.flush_range(&device.raw, 0, None)?;
538 }
539
540 // WebGPU uses the physical size of the texture for copies whereas vulkan uses
541 // the virtual size. We have passed validation, so it's safe to use the
542 // image extent data directly. We want the provided copy size to be no larger than
543 // the virtual size.
544 let max_image_extent = dst.kind.level_extent(destination.mip_level as _);
545 let image_extent = wgt::Extent3d {
546 width: size.width.min(max_image_extent.width),
547 height: size.height.min(max_image_extent.height),
548 depth_or_array_layers: size.depth_or_array_layers,
549 };
550
551 let region = hal::command::BufferImageCopy {
552 buffer_offset: 0,
553 buffer_width: (stage_bytes_per_row / bytes_per_block) * block_width,
554 buffer_height: texel_rows_per_image,
555 image_layers,
556 image_offset,
557 image_extent: conv::map_extent(&image_extent, dst.dimension),
558 };
559 unsafe {
560 stage.cmdbuf.pipeline_barrier(
561 super::all_image_stages() | hal::pso::PipelineStage::HOST
562 ..hal::pso::PipelineStage::TRANSFER,
563 hal::memory::Dependencies::empty(),
564 iter::once(hal::memory::Barrier::Buffer {
565 states: hal::buffer::Access::HOST_WRITE..hal::buffer::Access::TRANSFER_READ,
566 target: &stage.buffer,
567 range: hal::buffer::SubRange::WHOLE,
568 families: None,
569 })
570 .chain(transition.map(|pending| pending.into_hal(dst))),
571 );
572 stage.cmdbuf.copy_buffer_to_image(
573 &stage.buffer,
574 dst_raw,
575 hal::image::Layout::TransferDstOptimal,
576 iter::once(region),
577 );
578 }
579
580 device.pending_writes.consume(stage);
581 device
582 .pending_writes
583 .dst_textures
584 .insert(destination.texture);
585
586 Ok(())
587 }
588
queue_submit<B: GfxBackend>( &self, queue_id: id::QueueId, command_buffer_ids: &[id::CommandBufferId], ) -> Result<(), QueueSubmitError>589 pub fn queue_submit<B: GfxBackend>(
590 &self,
591 queue_id: id::QueueId,
592 command_buffer_ids: &[id::CommandBufferId],
593 ) -> Result<(), QueueSubmitError> {
594 profiling::scope!("submit", "Queue");
595
596 let hub = B::hub(self);
597 let mut token = Token::root();
598
599 let callbacks = {
600 let (mut device_guard, mut token) = hub.devices.write(&mut token);
601 let device = device_guard
602 .get_mut(queue_id)
603 .map_err(|_| DeviceError::Invalid)?;
604 let pending_write_command_buffer = device.pending_writes.finish();
605 device.temp_suspected.clear();
606 device.active_submission_index += 1;
607 let submit_index = device.active_submission_index;
608
609 let fence = {
610 let mut signal_swapchain_semaphores = SmallVec::<[_; 1]>::new();
611 let (mut swap_chain_guard, mut token) = hub.swap_chains.write(&mut token);
612 let (mut command_buffer_guard, mut token) = hub.command_buffers.write(&mut token);
613
614 if !command_buffer_ids.is_empty() {
615 profiling::scope!("prepare");
616
617 let (render_bundle_guard, mut token) = hub.render_bundles.read(&mut token);
618 let (_, mut token) = hub.pipeline_layouts.read(&mut token);
619 let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token);
620 let (compute_pipe_guard, mut token) = hub.compute_pipelines.read(&mut token);
621 let (render_pipe_guard, mut token) = hub.render_pipelines.read(&mut token);
622 let (mut buffer_guard, mut token) = hub.buffers.write(&mut token);
623 let (texture_guard, mut token) = hub.textures.write(&mut token);
624 let (texture_view_guard, mut token) = hub.texture_views.read(&mut token);
625 let (sampler_guard, _) = hub.samplers.read(&mut token);
626
627 let mut required_buffer_inits = RequiredBufferInits::default();
628 //Note: locking the trackers has to be done after the storages
629 let mut trackers = device.trackers.lock();
630
631 //TODO: if multiple command buffers are submitted, we can re-use the last
632 // native command buffer of the previous chain instead of always creating
633 // a temporary one, since the chains are not finished.
634
635 // finish all the command buffers first
636 for &cmb_id in command_buffer_ids {
637 let cmdbuf = match command_buffer_guard.get_mut(cmb_id) {
638 Ok(cmdbuf) => cmdbuf,
639 Err(_) => continue,
640 };
641 #[cfg(feature = "trace")]
642 if let Some(ref trace) = device.trace {
643 trace.lock().add(Action::Submit(
644 submit_index,
645 cmdbuf.commands.take().unwrap(),
646 ));
647 }
648 if !cmdbuf.is_finished() {
649 continue;
650 }
651
652 required_buffer_inits
653 .add(&cmdbuf.buffer_memory_init_actions, &mut *buffer_guard)?;
654 // optimize the tracked states
655 cmdbuf.trackers.optimize();
656
657 for sc_id in cmdbuf.used_swap_chains.drain(..) {
658 let sc = &mut swap_chain_guard[sc_id.value];
659 if sc.acquired_view_id.is_none() {
660 return Err(QueueSubmitError::SwapChainOutputDropped);
661 }
662 if sc.active_submission_index != submit_index {
663 sc.active_submission_index = submit_index;
664 // Only add a signal if this is the first time for this swapchain
665 // to be used in the submission.
666 signal_swapchain_semaphores.push(sc_id.value);
667 }
668 }
669
670 // update submission IDs
671 for id in cmdbuf.trackers.buffers.used() {
672 let buffer = &mut buffer_guard[id];
673 if buffer.raw.is_none() {
674 return Err(QueueSubmitError::DestroyedBuffer(id.0));
675 }
676 if !buffer.life_guard.use_at(submit_index) {
677 if let BufferMapState::Active { .. } = buffer.map_state {
678 log::warn!("Dropped buffer has a pending mapping.");
679 super::unmap_buffer(&device.raw, buffer)?;
680 }
681 device.temp_suspected.buffers.push(id);
682 } else {
683 match buffer.map_state {
684 BufferMapState::Idle => (),
685 _ => panic!("Buffer {:?} is still mapped", id),
686 }
687 }
688 }
689 for id in cmdbuf.trackers.textures.used() {
690 let texture = &texture_guard[id];
691 if texture.raw.is_none() {
692 return Err(QueueSubmitError::DestroyedTexture(id.0));
693 }
694 if !texture.life_guard.use_at(submit_index) {
695 device.temp_suspected.textures.push(id);
696 }
697 }
698 for id in cmdbuf.trackers.views.used() {
699 if !texture_view_guard[id].life_guard.use_at(submit_index) {
700 device.temp_suspected.texture_views.push(id);
701 }
702 }
703 for id in cmdbuf.trackers.bind_groups.used() {
704 if !bind_group_guard[id].life_guard.use_at(submit_index) {
705 device.temp_suspected.bind_groups.push(id);
706 }
707 }
708 for id in cmdbuf.trackers.samplers.used() {
709 if !sampler_guard[id].life_guard.use_at(submit_index) {
710 device.temp_suspected.samplers.push(id);
711 }
712 }
713 for id in cmdbuf.trackers.compute_pipes.used() {
714 if !compute_pipe_guard[id].life_guard.use_at(submit_index) {
715 device.temp_suspected.compute_pipelines.push(id);
716 }
717 }
718 for id in cmdbuf.trackers.render_pipes.used() {
719 if !render_pipe_guard[id].life_guard.use_at(submit_index) {
720 device.temp_suspected.render_pipelines.push(id);
721 }
722 }
723 for id in cmdbuf.trackers.bundles.used() {
724 if !render_bundle_guard[id].life_guard.use_at(submit_index) {
725 device.temp_suspected.render_bundles.push(id);
726 }
727 }
728
729 // execute resource transitions
730 let mut transit = device.cmd_allocator.extend(cmdbuf);
731 unsafe {
732 // the last buffer was open, closing now
733 cmdbuf.raw.last_mut().unwrap().finish();
734 transit
735 .begin_primary(hal::command::CommandBufferFlags::ONE_TIME_SUBMIT);
736 }
737 log::trace!("Stitching command buffer {:?} before submission", cmb_id);
738 trackers.merge_extend_stateless(&cmdbuf.trackers);
739 CommandBuffer::insert_barriers(
740 &mut transit,
741 &mut *trackers,
742 &cmdbuf.trackers.buffers,
743 &cmdbuf.trackers.textures,
744 &*buffer_guard,
745 &*texture_guard,
746 );
747 unsafe {
748 transit.finish();
749 }
750 cmdbuf.raw.insert(0, transit);
751 }
752
753 log::trace!("Device after submission {}: {:#?}", submit_index, trackers);
754 drop(trackers);
755 if !required_buffer_inits.map.is_empty() {
756 device
757 .initialize_buffer_memory(required_buffer_inits, &mut *buffer_guard)?;
758 }
759 }
760
761 // now prepare the GPU submission
762 let mut fence = device
763 .raw
764 .create_fence(false)
765 .or(Err(DeviceError::OutOfMemory))?;
766 let signal_semaphores = signal_swapchain_semaphores
767 .into_iter()
768 .map(|sc_id| &swap_chain_guard[sc_id].semaphore);
769 //Note: we could technically avoid the heap Vec here
770 let mut command_buffers = Vec::new();
771 command_buffers.extend(pending_write_command_buffer.as_ref());
772 for &cmd_buf_id in command_buffer_ids.iter() {
773 match command_buffer_guard.get(cmd_buf_id) {
774 Ok(cmd_buf) if cmd_buf.is_finished() => {
775 command_buffers.extend(cmd_buf.raw.iter());
776 }
777 _ => {}
778 }
779 }
780
781 unsafe {
782 device.queue_group.queues[0].submit(
783 command_buffers.into_iter(),
784 iter::empty(),
785 signal_semaphores,
786 Some(&mut fence),
787 );
788 }
789 fence
790 };
791
792 if let Some(comb_raw) = pending_write_command_buffer {
793 device
794 .cmd_allocator
795 .after_submit_internal(comb_raw, submit_index);
796 }
797
798 let callbacks = match device.maintain(&hub, false, &mut token) {
799 Ok(callbacks) => callbacks,
800 Err(WaitIdleError::Device(err)) => return Err(QueueSubmitError::Queue(err)),
801 Err(WaitIdleError::StuckGpu) => return Err(QueueSubmitError::StuckGpu),
802 };
803
804 profiling::scope!("cleanup");
805 super::Device::lock_life_internal(&device.life_tracker, &mut token).track_submission(
806 submit_index,
807 fence,
808 &device.temp_suspected,
809 device.pending_writes.temp_resources.drain(..),
810 );
811
812 // finally, return the command buffers to the allocator
813 for &cmb_id in command_buffer_ids {
814 if let (Some(cmd_buf), _) = hub.command_buffers.unregister(cmb_id, &mut token) {
815 device
816 .cmd_allocator
817 .after_submit(cmd_buf, &device.raw, submit_index);
818 }
819 }
820
821 callbacks
822 };
823
824 // the map callbacks should execute with nothing locked!
825 drop(token);
826 super::fire_map_callbacks(callbacks);
827
828 Ok(())
829 }
830
queue_get_timestamp_period<B: GfxBackend>( &self, queue_id: id::QueueId, ) -> Result<f32, InvalidQueue>831 pub fn queue_get_timestamp_period<B: GfxBackend>(
832 &self,
833 queue_id: id::QueueId,
834 ) -> Result<f32, InvalidQueue> {
835 let hub = B::hub(self);
836 let mut token = Token::root();
837 let (device_guard, _) = hub.devices.read(&mut token);
838 match device_guard.get(queue_id) {
839 Ok(device) => Ok(device.queue_group.queues[0].timestamp_period()),
840 Err(_) => Err(InvalidQueue),
841 }
842 }
843 }
844
get_lowest_common_denom(a: u32, b: u32) -> u32845 fn get_lowest_common_denom(a: u32, b: u32) -> u32 {
846 let gcd = if a >= b {
847 get_greatest_common_divisor(a, b)
848 } else {
849 get_greatest_common_divisor(b, a)
850 };
851 a * b / gcd
852 }
853
get_greatest_common_divisor(mut a: u32, mut b: u32) -> u32854 fn get_greatest_common_divisor(mut a: u32, mut b: u32) -> u32 {
855 assert!(a >= b);
856 loop {
857 let c = a % b;
858 if c == 0 {
859 return b;
860 } else {
861 a = b;
862 b = c;
863 }
864 }
865 }
866
align_to(value: u32, alignment: u32) -> u32867 fn align_to(value: u32, alignment: u32) -> u32 {
868 match value % alignment {
869 0 => value,
870 other => value - other + alignment,
871 }
872 }
873
874 #[test]
test_lcd()875 fn test_lcd() {
876 assert_eq!(get_lowest_common_denom(2, 2), 2);
877 assert_eq!(get_lowest_common_denom(2, 3), 6);
878 assert_eq!(get_lowest_common_denom(6, 4), 12);
879 }
880
881 #[test]
test_gcd()882 fn test_gcd() {
883 assert_eq!(get_greatest_common_divisor(5, 1), 1);
884 assert_eq!(get_greatest_common_divisor(4, 2), 2);
885 assert_eq!(get_greatest_common_divisor(6, 4), 2);
886 assert_eq!(get_greatest_common_divisor(7, 7), 7);
887 }
888