1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 //! This module contains the convoluted logic that goes into uploading content into
6 //! the texture cache's textures.
7 //!
8 //! We need to support various combinations of code paths depending on the quirks of
9 //! each hardware/driver configuration:
10 //! - direct upload,
11 //! - staged upload via a pixel buffer object,
12 //! - staged upload via a direct upload to a staging texture where PBO's aren't supported,
13 //! - copy from the staging to destination textures, either via blits or batched draw calls.
14 //!
15 //! Conceptually a lot of this logic should probably be in the device module, but some code
16 //! here relies on submitting draw calls via the renderer.
17
18
19 use std::mem;
20 use std::collections::VecDeque;
21 use euclid::Transform3D;
22 use time::precise_time_ns;
23 use malloc_size_of::MallocSizeOfOps;
24 use api::units::*;
25 use api::{ExternalImageSource, PremultipliedColorF, ImageBufferKind, ImageRendering, ImageFormat};
26 use crate::renderer::{
27 Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR
28 };
29 use crate::internal_types::{
30 FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,
31 CacheTextureId, RenderTargetInfo,
32 };
33 use crate::device::{
34 Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,
35 TextureFilter,
36 };
37 use crate::gpu_types::{ZBufferId, CompositeInstance, CompositorTransform};
38 use crate::batch::BatchTextures;
39 use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};
40 use crate::composite::{CompositeFeatures, CompositeSurfaceFormat};
41 use crate::profiler;
42 use crate::render_api::MemoryReport;
43
44 pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);
45
46 /// Upload a number of items to texture cache textures.
47 ///
48 /// This is the main entry point of the texture cache upload code.
49 /// See also the module documentation for more information.
upload_to_texture_cache( renderer: &mut Renderer, update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>, )50 pub fn upload_to_texture_cache(
51 renderer: &mut Renderer,
52 update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
53 ) {
54
55 let mut stats = UploadStats {
56 num_draw_calls: 0,
57 upload_time: 0,
58 cpu_buffer_alloc_time: 0,
59 texture_alloc_time: 0,
60 cpu_copy_time: 0,
61 gpu_copy_commands_time: 0,
62 bytes_uploaded: 0,
63 };
64
65 let upload_total_start = precise_time_ns();
66
67 let mut batch_upload_textures = Vec::new();
68
69 // A list of copies that must be performed from the temporary textures to the texture cache.
70 let mut batch_upload_copies = Vec::new();
71
72 // For each texture format, this stores a list of staging buffers
73 // and a texture allocator for packing the buffers.
74 let mut batch_upload_buffers = FastHashMap::default();
75
76 // For best performance we use a single TextureUploader for all uploads.
77 // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.
78 let mut uploader = renderer.device.upload_texture(
79 &mut renderer.texture_upload_pbo_pool,
80 );
81
82 let num_updates = update_list.len();
83
84 for (texture_id, updates) in update_list {
85 let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture;
86 for update in updates {
87 let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;
88
89 let dummy_data;
90 let data = match source {
91 TextureUpdateSource::Bytes { ref data } => {
92 &data[offset as usize ..]
93 }
94 TextureUpdateSource::External { id, channel_index } => {
95 let handler = renderer.external_image_handler
96 .as_mut()
97 .expect("Found external image, but no handler set!");
98 // The filter is only relevant for NativeTexture external images.
99 match handler.lock(id, channel_index, ImageRendering::Auto).source {
100 ExternalImageSource::RawData(data) => {
101 &data[offset as usize ..]
102 }
103 ExternalImageSource::Invalid => {
104 // Create a local buffer to fill the pbo.
105 let bpp = texture.get_format().bytes_per_pixel();
106 let width = stride.unwrap_or(rect.width() * bpp);
107 let total_size = width * rect.height();
108 // WR haven't support RGBAF32 format in texture_cache, so
109 // we use u8 type here.
110 dummy_data = vec![0xFFu8; total_size as usize];
111 &dummy_data
112 }
113 ExternalImageSource::NativeTexture(eid) => {
114 panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
115 }
116 }
117 }
118 TextureUpdateSource::DebugClear => {
119 let draw_target = DrawTarget::from_texture(
120 texture,
121 false,
122 );
123 renderer.device.bind_draw_target(draw_target);
124 renderer.device.clear_target(
125 Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
126 None,
127 Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
128 );
129
130 continue;
131 }
132 };
133
134 let use_batch_upload = renderer.device.use_batched_texture_uploads() &&
135 texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&
136 rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width &&
137 rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height;
138
139 if use_batch_upload {
140 copy_into_staging_buffer(
141 &mut renderer.device,
142 &mut uploader,
143 &mut renderer.staging_texture_pool,
144 rect,
145 stride,
146 data,
147 texture_id,
148 texture,
149 &mut batch_upload_buffers,
150 &mut batch_upload_textures,
151 &mut batch_upload_copies,
152 &mut stats,
153 );
154 } else {
155 let upload_start_time = precise_time_ns();
156
157 stats.bytes_uploaded += uploader.upload(
158 &mut renderer.device,
159 texture,
160 rect,
161 stride,
162 format_override,
163 data.as_ptr(),
164 data.len()
165 );
166
167 stats.upload_time += precise_time_ns() - upload_start_time;
168 }
169
170 if let TextureUpdateSource::External { id, channel_index } = source {
171 let handler = renderer.external_image_handler
172 .as_mut()
173 .expect("Found external image, but no handler set!");
174 handler.unlock(id, channel_index);
175 }
176 }
177 }
178
179 let upload_start_time = precise_time_ns();
180 // Upload batched texture updates to their temporary textures.
181 for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {
182 let texture = &batch_upload_textures[batch_buffer.texture_index];
183 match batch_buffer.staging_buffer {
184 StagingBufferKind::Pbo(pbo) => {
185 stats.bytes_uploaded += uploader.upload_staged(
186 &mut renderer.device,
187 texture,
188 DeviceIntRect::from_size(texture.get_dimensions()),
189 None,
190 pbo,
191 );
192 }
193 StagingBufferKind::CpuBuffer { bytes, .. } => {
194 let bpp = texture.get_format().bytes_per_pixel();
195 stats.bytes_uploaded += uploader.upload(
196 &mut renderer.device,
197 texture,
198 batch_buffer.upload_rect,
199 Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),
200 None,
201 bytes.as_ptr(),
202 bytes.len()
203 );
204 renderer.staging_texture_pool.return_temporary_buffer(bytes);
205 }
206 }
207 }
208 stats.upload_time += precise_time_ns() - upload_start_time;
209
210
211 // Flush all uploads, batched or otherwise.
212 let flush_start_time = precise_time_ns();
213 uploader.flush(&mut renderer.device);
214 stats.upload_time += precise_time_ns() - flush_start_time;
215
216 if !batch_upload_copies.is_empty() {
217 // Copy updates that were batch uploaded to their correct destination in the texture cache.
218 // Sort them by destination and source to minimize framebuffer binding changes.
219 batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));
220
221 let gpu_copy_start = precise_time_ns();
222
223 if renderer.device.use_draw_calls_for_texture_copy() {
224 // Some drivers are very have a very high CPU overhead when submitting hundreds of small blit
225 // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a
226 // few hundred blits). In this case we do the copy with batched draw calls.
227 copy_from_staging_to_cache_using_draw_calls(
228 renderer,
229 &mut stats,
230 &batch_upload_textures,
231 batch_upload_copies,
232 );
233 } else {
234 copy_from_staging_to_cache(
235 renderer,
236 &batch_upload_textures,
237 batch_upload_copies,
238 );
239 }
240
241 stats.gpu_copy_commands_time += precise_time_ns() - gpu_copy_start;
242 }
243
244 for texture in batch_upload_textures.drain(..) {
245 renderer.staging_texture_pool.return_texture(texture);
246 }
247
248 // Update the profile counters. We use add instead of set because
249 // this function can be called several times per frame.
250 // We don't update the counters when their value is zero, so that
251 // the profiler can treat them as events and we can get notified
252 // when they happen.
253
254 let upload_total = precise_time_ns() - upload_total_start;
255 renderer.profile.add(
256 profiler::TOTAL_UPLOAD_TIME,
257 profiler::ns_to_ms(upload_total)
258 );
259
260 if num_updates > 0 {
261 renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);
262 }
263
264 if stats.bytes_uploaded > 0 {
265 renderer.profile.add(
266 profiler::TEXTURE_UPLOADS_MEM,
267 profiler::bytes_to_mb(stats.bytes_uploaded)
268 );
269 }
270
271 if stats.cpu_copy_time > 0 {
272 renderer.profile.add(
273 profiler::UPLOAD_CPU_COPY_TIME,
274 profiler::ns_to_ms(stats.cpu_copy_time)
275 );
276 }
277 if stats.upload_time > 0 {
278 renderer.profile.add(
279 profiler::UPLOAD_TIME,
280 profiler::ns_to_ms(stats.upload_time)
281 );
282 }
283 if stats.texture_alloc_time > 0 {
284 renderer.profile.add(
285 profiler::STAGING_TEXTURE_ALLOCATION_TIME,
286 profiler::ns_to_ms(stats.texture_alloc_time)
287 );
288 }
289 if stats.cpu_buffer_alloc_time > 0 {
290 renderer.profile.add(
291 profiler::CPU_TEXTURE_ALLOCATION_TIME,
292 profiler::ns_to_ms(stats.cpu_buffer_alloc_time)
293 );
294 }
295 if stats.num_draw_calls > 0{
296 renderer.profile.add(
297 profiler::UPLOAD_NUM_COPY_BATCHES,
298 stats.num_draw_calls
299 );
300 }
301
302 if stats.gpu_copy_commands_time > 0 {
303 renderer.profile.add(
304 profiler::UPLOAD_GPU_COPY_TIME,
305 profiler::ns_to_ms(stats.gpu_copy_commands_time)
306 );
307 }
308 }
309
310 /// Copy an item into a batched upload staging buffer.
copy_into_staging_buffer<'a>( device: &mut Device, uploader: &mut TextureUploader< 'a>, staging_texture_pool: &mut UploadTexturePool, update_rect: DeviceIntRect, update_stride: Option<i32>, data: &[u8], dest_texture_id: CacheTextureId, texture: &Texture, batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>, batch_upload_textures: &mut Vec<Texture>, batch_upload_copies: &mut Vec<BatchUploadCopy>, stats: &mut UploadStats )311 fn copy_into_staging_buffer<'a>(
312 device: &mut Device,
313 uploader: &mut TextureUploader< 'a>,
314 staging_texture_pool: &mut UploadTexturePool,
315 update_rect: DeviceIntRect,
316 update_stride: Option<i32>,
317 data: &[u8],
318 dest_texture_id: CacheTextureId,
319 texture: &Texture,
320 batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
321 batch_upload_textures: &mut Vec<Texture>,
322 batch_upload_copies: &mut Vec<BatchUploadCopy>,
323 stats: &mut UploadStats
324 ) {
325 let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())
326 .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
327
328 // Allocate a region within the staging buffer for this update. If there is
329 // no room in an existing buffer then allocate another texture and buffer.
330 let (slice, origin) = match allocator.allocate(&update_rect.size()) {
331 Some((slice, origin)) => (slice, origin),
332 None => {
333 let new_slice = FreeRectSlice(buffers.len() as u32);
334 allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size());
335
336 let texture_alloc_time_start = precise_time_ns();
337 let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
338 stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
339
340 let texture_index = batch_upload_textures.len();
341 batch_upload_textures.push(staging_texture);
342
343 let cpu_buffer_alloc_start_time = precise_time_ns();
344 let staging_buffer = match device.upload_method() {
345 UploadMethod::Immediate => StagingBufferKind::CpuBuffer {
346 bytes: staging_texture_pool.get_temporary_buffer(),
347 },
348 UploadMethod::PixelBuffer(_) => {
349 let pbo = uploader.stage(
350 device,
351 texture.get_format(),
352 BATCH_UPLOAD_TEXTURE_SIZE,
353 ).unwrap();
354
355 StagingBufferKind::Pbo(pbo)
356 }
357 };
358 stats.cpu_buffer_alloc_time += precise_time_ns() - cpu_buffer_alloc_start_time;
359
360 buffers.push(BatchUploadBuffer {
361 staging_buffer,
362 texture_index,
363 upload_rect: DeviceIntRect::zero()
364 });
365
366 (new_slice, DeviceIntPoint::zero())
367 }
368 };
369 let buffer = &mut buffers[slice.0 as usize];
370 let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size());
371 buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);
372
373 batch_upload_copies.push(BatchUploadCopy {
374 src_texture_index: buffer.texture_index,
375 src_offset: allocated_rect.min,
376 dest_texture_id,
377 dest_offset: update_rect.min,
378 size: update_rect.size(),
379 });
380
381 unsafe {
382 let memcpy_start_time = precise_time_ns();
383 let bpp = texture.get_format().bytes_per_pixel() as usize;
384 let width_bytes = update_rect.width() as usize * bpp;
385 let src_stride = update_stride.map_or(width_bytes, |stride| {
386 assert!(stride >= 0);
387 stride as usize
388 });
389 let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes;
390 assert!(src_size <= data.len());
391
392 let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);
393 let (dst_stride, dst) = match &mut buffer.staging_buffer {
394 StagingBufferKind::Pbo(buffer) => (
395 buffer.get_stride(),
396 buffer.get_mapping(),
397 ),
398 StagingBufferKind::CpuBuffer { bytes } => (
399 BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,
400 &mut bytes[..],
401 )
402 };
403
404 // copy the data line-by-line in to the buffer so that we do not overwrite
405 // any other region of the buffer.
406 for y in 0..allocated_rect.height() as usize {
407 let src_start = y * src_stride;
408 let src_end = src_start + width_bytes;
409 let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride +
410 allocated_rect.min.x as usize * bpp;
411 let dst_end = dst_start + width_bytes;
412
413 dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
414 }
415
416 stats.cpu_copy_time += precise_time_ns() - memcpy_start_time;
417 }
418 }
419
420
421 /// Copy from the staging PBOs or textures to texture cache textures using blit commands.
422 ///
423 /// Using blits instead of draw calls is supposedly more efficient but some drivers have
424 /// a very high per-command overhead so in some configurations we end up using
425 /// copy_from_staging_to_cache_using_draw_calls instead.
copy_from_staging_to_cache( renderer: &mut Renderer, batch_upload_textures: &[Texture], batch_upload_copies: Vec<BatchUploadCopy>, )426 fn copy_from_staging_to_cache(
427 renderer: &mut Renderer,
428 batch_upload_textures: &[Texture],
429 batch_upload_copies: Vec<BatchUploadCopy>,
430 ) {
431 for copy in batch_upload_copies {
432 let dest_texture = &renderer.texture_resolver.texture_cache_map[©.dest_texture_id].texture;
433
434 renderer.device.copy_texture_sub_region(
435 &batch_upload_textures[copy.src_texture_index],
436 copy.src_offset.x as _,
437 copy.src_offset.y as _,
438 dest_texture,
439 copy.dest_offset.x as _,
440 copy.dest_offset.y as _,
441 copy.size.width as _,
442 copy.size.height as _,
443 );
444 }
445 }
446
447 /// Generate and submit composite shader batches to copy from
448 /// the staging textures to the destination cache textures.
449 ///
450 /// If this shows up in GPU time ptofiles we could replace it with
451 /// a simpler shader (composite.glsl is already quite simple).
copy_from_staging_to_cache_using_draw_calls( renderer: &mut Renderer, stats: &mut UploadStats, batch_upload_textures: &[Texture], batch_upload_copies: Vec<BatchUploadCopy>, )452 fn copy_from_staging_to_cache_using_draw_calls(
453 renderer: &mut Renderer,
454 stats: &mut UploadStats,
455 batch_upload_textures: &[Texture],
456 batch_upload_copies: Vec<BatchUploadCopy>,
457 ) {
458 let mut dummy_stats = RendererStats {
459 total_draw_calls: 0,
460 alpha_target_count: 0,
461 color_target_count: 0,
462 texture_upload_mb: 0.0,
463 resource_upload_time: 0.0,
464 gpu_cache_upload_time: 0.0,
465 gecko_display_list_time: 0.0,
466 wr_display_list_time: 0.0,
467 scene_build_time: 0.0,
468 frame_build_time: 0.0,
469 full_display_list: false,
470 full_paint: false,
471 };
472
473 let mut copy_instances = Vec::new();
474 let mut prev_src = None;
475 let mut prev_dst = None;
476
477 for copy in batch_upload_copies {
478
479 let src_changed = prev_src != Some(copy.src_texture_index);
480 let dst_changed = prev_dst != Some(copy.dest_texture_id);
481
482 if (src_changed || dst_changed) && !copy_instances.is_empty() {
483
484 renderer.draw_instanced_batch(
485 ©_instances,
486 VertexArrayKind::Composite,
487 // We bind the staging texture manually because it isn't known
488 // to the texture resolver.
489 &BatchTextures::empty(),
490 &mut dummy_stats,
491 );
492
493 stats.num_draw_calls += 1;
494 copy_instances.clear();
495 }
496
497 if dst_changed {
498 let dest_texture = &renderer.texture_resolver.texture_cache_map[©.dest_texture_id].texture;
499 let target_size = dest_texture.get_dimensions();
500
501 let draw_target = DrawTarget::from_texture(
502 dest_texture,
503 false,
504 );
505 renderer.device.bind_draw_target(draw_target);
506
507 let projection = Transform3D::ortho(
508 0.0,
509 target_size.width as f32,
510 0.0,
511 target_size.height as f32,
512 renderer.device.ortho_near_plane(),
513 renderer.device.ortho_far_plane(),
514 );
515
516 renderer.shaders
517 .borrow_mut()
518 .get_composite_shader(
519 CompositeSurfaceFormat::Rgba,
520 ImageBufferKind::Texture2D,
521 CompositeFeatures::empty(),
522 ).bind(
523 &mut renderer.device,
524 &projection,
525 None,
526 &mut renderer.renderer_errors
527 );
528
529 prev_dst = Some(copy.dest_texture_id);
530 }
531
532 if src_changed {
533 renderer.device.bind_texture(
534 TextureSampler::Color0,
535 &batch_upload_textures[copy.src_texture_index],
536 Swizzle::default(),
537 );
538
539 prev_src = Some(copy.src_texture_index)
540 }
541
542 let dest_rect = DeviceRect::from_origin_and_size(
543 copy.dest_offset.to_f32(),
544 copy.size.to_f32(),
545 );
546
547 let src_rect = TexelRect::new(
548 copy.src_offset.x as f32,
549 copy.src_offset.y as f32,
550 (copy.src_offset.x + copy.size.width) as f32,
551 (copy.src_offset.y + copy.size.height) as f32,
552 );
553
554 copy_instances.push(CompositeInstance::new_rgb(
555 dest_rect.cast_unit(),
556 dest_rect,
557 PremultipliedColorF::WHITE,
558 ZBufferId(0),
559 src_rect,
560 CompositorTransform::identity(),
561 ));
562 }
563
564 if !copy_instances.is_empty() {
565 renderer.draw_instanced_batch(
566 ©_instances,
567 VertexArrayKind::Composite,
568 // We bind the staging texture manually because it isn't known
569 // to the texture resolver.
570 &BatchTextures::empty(),
571 &mut dummy_stats,
572 );
573
574 stats.num_draw_calls += 1;
575 }
576 }
577
578 /// A very basic pool to avoid reallocating staging textures as well as staging
579 /// CPU side buffers.
580 pub struct UploadTexturePool {
581 /// The textures in the pool associated with a last used frame index.
582 ///
583 /// The outer array corresponds to each of teh three supported texture formats.
584 textures: [VecDeque<(Texture, u64)>; 3],
585 // Frame at which to deallocate some textures if there are too many in the pool,
586 // for each format.
587 delay_texture_deallocation: [u64; 3],
588 current_frame: u64,
589
590 /// Temporary buffers that are used when using staging uploads + glTexImage2D.
591 ///
592 /// Temporary buffers aren't used asynchronously so they can be reused every frame.
593 /// To keep things simple we always allocate enough memory for formats with four bytes
594 /// per pixel (more than we need for alpha-only textures but it works just as well).
595 temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,
596 used_temporary_buffers: usize,
597 delay_buffer_deallocation: u64,
598 }
599
600 impl UploadTexturePool {
new() -> Self601 pub fn new() -> Self {
602 UploadTexturePool {
603 textures: [VecDeque::new(), VecDeque::new(), VecDeque::new()],
604 delay_texture_deallocation: [0; 3],
605 current_frame: 0,
606 temporary_buffers: Vec::new(),
607 used_temporary_buffers: 0,
608 delay_buffer_deallocation: 0,
609 }
610 }
611
format_index(&self, format: ImageFormat) -> usize612 fn format_index(&self, format: ImageFormat) -> usize {
613 match format {
614 ImageFormat::RGBA8 => 0,
615 ImageFormat::BGRA8 => 1,
616 ImageFormat::R8 => 2,
617 _ => { panic!("unexpected format"); }
618 }
619 }
620
begin_frame(&mut self)621 pub fn begin_frame(&mut self) {
622 self.current_frame += 1;
623 }
624
625 /// Create or reuse a staging texture.
626 ///
627 /// See also return_texture.
get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture628 pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {
629
630 // First try to reuse a texture from the pool.
631 // "available" here means hasn't been used for 2 frames to avoid stalls.
632 // No need to scan the vector. Newer textures are always pushed at the back
633 // of the vector so we know the first element is the least recently used.
634 let format_idx = self.format_index(format);
635 let can_reuse = self.textures[format_idx].get(0)
636 .map(|tex| self.current_frame - tex.1 > 2)
637 .unwrap_or(false);
638
639 if can_reuse {
640 return self.textures[format_idx].pop_front().unwrap().0;
641 }
642
643 // If we couldn't find an available texture, create a new one.
644
645 device.create_texture(
646 ImageBufferKind::Texture2D,
647 format,
648 BATCH_UPLOAD_TEXTURE_SIZE.width,
649 BATCH_UPLOAD_TEXTURE_SIZE.height,
650 TextureFilter::Nearest,
651 // Currently we need render target support as we always use glBlitFramebuffer
652 // to copy the texture data. Instead, we should use glCopyImageSubData on some
653 // platforms, and avoid creating the FBOs in that case.
654 Some(RenderTargetInfo { has_depth: false }),
655 )
656 }
657
658 /// Hand the staging texture back to the pool after being done with uploads.
659 ///
660 /// The texture must have been obtained from this pool via get_texture.
return_texture(&mut self, texture: Texture)661 pub fn return_texture(&mut self, texture: Texture) {
662 let format_idx = self.format_index(texture.get_format());
663 self.textures[format_idx].push_back((texture, self.current_frame));
664 }
665
666 /// Create or reuse a temporary CPU buffer.
667 ///
668 /// These buffers are used in the batched upload path when PBOs are not supported.
669 /// Content is first written to the temporary buffer and uploaded via a single
670 /// glTexSubImage2D call.
get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>>671 pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {
672 self.used_temporary_buffers += 1;
673 self.temporary_buffers.pop().unwrap_or_else(|| {
674 vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]
675 })
676 }
677
678 /// Return memory that was obtained from this pool via get_temporary_buffer.
return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>)679 pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {
680 assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);
681 self.temporary_buffers.push(buffer);
682 }
683
684 /// Deallocate this pool's CPU and GPU memory.
delete_textures(&mut self, device: &mut Device)685 pub fn delete_textures(&mut self, device: &mut Device) {
686 for format in &mut self.textures {
687 while let Some(texture) = format.pop_back() {
688 device.delete_texture(texture.0)
689 }
690 }
691 self.temporary_buffers.clear();
692 }
693
694 /// Deallocate some textures if there are too many for a long time.
end_frame(&mut self, device: &mut Device)695 pub fn end_frame(&mut self, device: &mut Device) {
696 for format_idx in 0..self.textures.len() {
697 // Count the number of reusable staging textures.
698 // if it stays high for a large number of frames, truncate it back to 8-ish
699 // over multiple frames.
700
701 let mut num_reusable_textures = 0;
702 for texture in &self.textures[format_idx] {
703 if self.current_frame - texture.1 > 2 {
704 num_reusable_textures += 1;
705 }
706 }
707
708 if num_reusable_textures < 8 {
709 // Don't deallocate textures for another 120 frames.
710 self.delay_texture_deallocation[format_idx] = self.current_frame + 120;
711 }
712
713 // Deallocate up to 4 staging textures every frame.
714 let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {
715 num_reusable_textures.min(4)
716 } else {
717 0
718 };
719
720 for _ in 0..to_remove {
721 let texture = self.textures[format_idx].pop_front().unwrap().0;
722 device.delete_texture(texture);
723 }
724 }
725
726 // Similar logic for temporary CPU buffers.
727 let unused_buffers = self.temporary_buffers.len() - self.used_temporary_buffers;
728 if unused_buffers < 8 {
729 self.delay_buffer_deallocation = self.current_frame + 120;
730 }
731 let to_remove = if self.current_frame > self.delay_buffer_deallocation {
732 unused_buffers.min(4)
733 } else {
734 0
735 };
736 for _ in 0..to_remove {
737 // Unlike textures it doesn't matter whether we pop from the front or back
738 // of the vector.
739 self.temporary_buffers.pop();
740 }
741 self.used_temporary_buffers = 0;
742 }
743
report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps)744 pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
745 for buf in &self.temporary_buffers {
746 report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };
747 }
748
749 for format in &self.textures {
750 for texture in format {
751 report.upload_staging_textures += texture.0.size_in_bytes();
752 }
753 }
754 }
755 }
756
757 struct UploadStats {
758 num_draw_calls: u32,
759 upload_time: u64,
760 cpu_buffer_alloc_time: u64,
761 texture_alloc_time: u64,
762 cpu_copy_time: u64,
763 gpu_copy_commands_time: u64,
764 bytes_uploaded: usize,
765 }
766
767 #[derive(Debug)]
768 enum StagingBufferKind<'a> {
769 Pbo(UploadStagingBuffer<'a>),
770 CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> }
771 }
772 #[derive(Debug)]
773 struct BatchUploadBuffer<'a> {
774 staging_buffer: StagingBufferKind<'a>,
775 texture_index: usize,
776 // A rectangle containing all items going into this staging texture, so
777 // that we can avoid uploading the entire area if we are using glTexSubImage2d.
778 upload_rect: DeviceIntRect,
779 }
780
781 // On some devices performing many small texture uploads is slow, so instead we batch
782 // updates in to a small number of uploads to temporary textures, then copy from those
783 // textures to the correct place in the texture cache.
784 // A list of temporary textures that batches of updates are uploaded to.
785 #[derive(Debug)]
786 struct BatchUploadCopy {
787 // Index within batch_upload_textures
788 src_texture_index: usize,
789 src_offset: DeviceIntPoint,
790 dest_texture_id: CacheTextureId,
791 dest_offset: DeviceIntPoint,
792 size: DeviceIntSize,
793 }
794