1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 //! This module contains the convoluted logic that goes into uploading content into
6 //! the texture cache's textures.
7 //!
8 //! We need to support various combinations of code paths depending on the quirks of
9 //! each hardware/driver configuration:
10 //! - direct upload,
11 //! - staged upload via a pixel buffer object,
12 //! - staged upload via a direct upload to a staging texture where PBO's aren't supported,
13 //! - copy from the staging to destination textures, either via blits or batched draw calls.
14 //!
15 //! Conceptually a lot of this logic should probably be in the device module, but some code
16 //! here relies on submitting draw calls via the renderer.
17 
18 
19 use std::mem;
20 use std::collections::VecDeque;
21 use euclid::Transform3D;
22 use time::precise_time_ns;
23 use malloc_size_of::MallocSizeOfOps;
24 use api::units::*;
25 use api::{ExternalImageSource, PremultipliedColorF, ImageBufferKind, ImageRendering, ImageFormat};
26 use crate::renderer::{
27     Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR
28 };
29 use crate::internal_types::{
30     FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,
31     CacheTextureId, RenderTargetInfo,
32 };
33 use crate::device::{
34     Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,
35     TextureFilter,
36 };
37 use crate::gpu_types::{ZBufferId, CompositeInstance, CompositorTransform};
38 use crate::batch::BatchTextures;
39 use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};
40 use crate::composite::{CompositeFeatures, CompositeSurfaceFormat};
41 use crate::profiler;
42 use crate::render_api::MemoryReport;
43 
44 pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);
45 
46 /// Upload a number of items to texture cache textures.
47 ///
48 /// This is the main entry point of the texture cache upload code.
49 /// See also the module documentation for more information.
upload_to_texture_cache( renderer: &mut Renderer, update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>, )50 pub fn upload_to_texture_cache(
51     renderer: &mut Renderer,
52     update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
53 ) {
54 
55     let mut stats = UploadStats {
56         num_draw_calls: 0,
57         upload_time: 0,
58         cpu_buffer_alloc_time: 0,
59         texture_alloc_time: 0,
60         cpu_copy_time: 0,
61         gpu_copy_commands_time: 0,
62         bytes_uploaded: 0,
63     };
64 
65     let upload_total_start = precise_time_ns();
66 
67     let mut batch_upload_textures = Vec::new();
68 
69     // A list of copies that must be performed from the temporary textures to the texture cache.
70     let mut batch_upload_copies = Vec::new();
71 
72     // For each texture format, this stores a list of staging buffers
73     // and a texture allocator for packing the buffers.
74     let mut batch_upload_buffers = FastHashMap::default();
75 
76     // For best performance we use a single TextureUploader for all uploads.
77     // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.
78     let mut uploader = renderer.device.upload_texture(
79         &mut renderer.texture_upload_pbo_pool,
80     );
81 
82     let num_updates = update_list.len();
83 
84     for (texture_id, updates) in update_list {
85         let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture;
86         for update in updates {
87             let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;
88 
89             let dummy_data;
90             let data = match source {
91                 TextureUpdateSource::Bytes { ref data } => {
92                     &data[offset as usize ..]
93                 }
94                 TextureUpdateSource::External { id, channel_index } => {
95                     let handler = renderer.external_image_handler
96                         .as_mut()
97                         .expect("Found external image, but no handler set!");
98                     // The filter is only relevant for NativeTexture external images.
99                     match handler.lock(id, channel_index, ImageRendering::Auto).source {
100                         ExternalImageSource::RawData(data) => {
101                             &data[offset as usize ..]
102                         }
103                         ExternalImageSource::Invalid => {
104                             // Create a local buffer to fill the pbo.
105                             let bpp = texture.get_format().bytes_per_pixel();
106                             let width = stride.unwrap_or(rect.width() * bpp);
107                             let total_size = width * rect.height();
108                             // WR haven't support RGBAF32 format in texture_cache, so
109                             // we use u8 type here.
110                             dummy_data = vec![0xFFu8; total_size as usize];
111                             &dummy_data
112                         }
113                         ExternalImageSource::NativeTexture(eid) => {
114                             panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
115                         }
116                     }
117                 }
118                 TextureUpdateSource::DebugClear => {
119                     let draw_target = DrawTarget::from_texture(
120                         texture,
121                         false,
122                     );
123                     renderer.device.bind_draw_target(draw_target);
124                     renderer.device.clear_target(
125                         Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
126                         None,
127                         Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
128                     );
129 
130                     continue;
131                 }
132             };
133 
134             let use_batch_upload = renderer.device.use_batched_texture_uploads() &&
135                 texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&
136                 rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width &&
137                 rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height;
138 
139             if use_batch_upload {
140                 copy_into_staging_buffer(
141                     &mut renderer.device,
142                     &mut uploader,
143                     &mut renderer.staging_texture_pool,
144                     rect,
145                     stride,
146                     data,
147                     texture_id,
148                     texture,
149                     &mut batch_upload_buffers,
150                     &mut batch_upload_textures,
151                     &mut batch_upload_copies,
152                     &mut stats,
153                 );
154             } else {
155                 let upload_start_time = precise_time_ns();
156 
157                 stats.bytes_uploaded += uploader.upload(
158                     &mut renderer.device,
159                     texture,
160                     rect,
161                     stride,
162                     format_override,
163                     data.as_ptr(),
164                     data.len()
165                 );
166 
167                 stats.upload_time += precise_time_ns() - upload_start_time;
168             }
169 
170             if let TextureUpdateSource::External { id, channel_index } = source {
171                 let handler = renderer.external_image_handler
172                     .as_mut()
173                     .expect("Found external image, but no handler set!");
174                 handler.unlock(id, channel_index);
175             }
176         }
177     }
178 
179     let upload_start_time = precise_time_ns();
180     // Upload batched texture updates to their temporary textures.
181     for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {
182         let texture = &batch_upload_textures[batch_buffer.texture_index];
183         match batch_buffer.staging_buffer {
184             StagingBufferKind::Pbo(pbo) => {
185                 stats.bytes_uploaded += uploader.upload_staged(
186                     &mut renderer.device,
187                     texture,
188                     DeviceIntRect::from_size(texture.get_dimensions()),
189                     None,
190                     pbo,
191                 );
192             }
193             StagingBufferKind::CpuBuffer { bytes, .. } => {
194                 let bpp = texture.get_format().bytes_per_pixel();
195                 stats.bytes_uploaded += uploader.upload(
196                     &mut renderer.device,
197                     texture,
198                     batch_buffer.upload_rect,
199                     Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),
200                     None,
201                     bytes.as_ptr(),
202                     bytes.len()
203                 );
204                 renderer.staging_texture_pool.return_temporary_buffer(bytes);
205             }
206         }
207     }
208     stats.upload_time += precise_time_ns() - upload_start_time;
209 
210 
211     // Flush all uploads, batched or otherwise.
212     let flush_start_time = precise_time_ns();
213     uploader.flush(&mut renderer.device);
214     stats.upload_time += precise_time_ns() - flush_start_time;
215 
216     if !batch_upload_copies.is_empty() {
217         // Copy updates that were batch uploaded to their correct destination in the texture cache.
218         // Sort them by destination and source to minimize framebuffer binding changes.
219         batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));
220 
221         let gpu_copy_start = precise_time_ns();
222 
223         if renderer.device.use_draw_calls_for_texture_copy() {
224             // Some drivers are very have a very high CPU overhead when submitting hundreds of small blit
225             // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a
226             // few hundred blits). In this case we do the copy with batched draw calls.
227             copy_from_staging_to_cache_using_draw_calls(
228                 renderer,
229                 &mut stats,
230                 &batch_upload_textures,
231                 batch_upload_copies,
232             );
233         } else {
234             copy_from_staging_to_cache(
235                 renderer,
236                 &batch_upload_textures,
237                 batch_upload_copies,
238             );
239         }
240 
241         stats.gpu_copy_commands_time += precise_time_ns() - gpu_copy_start;
242     }
243 
244     for texture in batch_upload_textures.drain(..) {
245         renderer.staging_texture_pool.return_texture(texture);
246     }
247 
248     // Update the profile counters. We use add instead of set because
249     // this function can be called several times per frame.
250     // We don't update the counters when their value is zero, so that
251     // the profiler can treat them as events and we can get notified
252     // when they happen.
253 
254     let upload_total = precise_time_ns() - upload_total_start;
255     renderer.profile.add(
256         profiler::TOTAL_UPLOAD_TIME,
257         profiler::ns_to_ms(upload_total)
258     );
259 
260     if num_updates > 0 {
261         renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);
262     }
263 
264     if stats.bytes_uploaded > 0 {
265         renderer.profile.add(
266             profiler::TEXTURE_UPLOADS_MEM,
267             profiler::bytes_to_mb(stats.bytes_uploaded)
268         );
269     }
270 
271     if stats.cpu_copy_time > 0 {
272         renderer.profile.add(
273             profiler::UPLOAD_CPU_COPY_TIME,
274             profiler::ns_to_ms(stats.cpu_copy_time)
275         );
276     }
277     if stats.upload_time > 0 {
278         renderer.profile.add(
279             profiler::UPLOAD_TIME,
280             profiler::ns_to_ms(stats.upload_time)
281         );
282     }
283     if stats.texture_alloc_time > 0 {
284         renderer.profile.add(
285             profiler::STAGING_TEXTURE_ALLOCATION_TIME,
286             profiler::ns_to_ms(stats.texture_alloc_time)
287         );
288     }
289     if stats.cpu_buffer_alloc_time > 0 {
290         renderer.profile.add(
291             profiler::CPU_TEXTURE_ALLOCATION_TIME,
292             profiler::ns_to_ms(stats.cpu_buffer_alloc_time)
293         );
294     }
295     if stats.num_draw_calls > 0{
296         renderer.profile.add(
297             profiler::UPLOAD_NUM_COPY_BATCHES,
298             stats.num_draw_calls
299         );
300     }
301 
302     if stats.gpu_copy_commands_time > 0 {
303         renderer.profile.add(
304             profiler::UPLOAD_GPU_COPY_TIME,
305             profiler::ns_to_ms(stats.gpu_copy_commands_time)
306         );
307     }
308 }
309 
310 /// Copy an item into a batched upload staging buffer.
copy_into_staging_buffer<'a>( device: &mut Device, uploader: &mut TextureUploader< 'a>, staging_texture_pool: &mut UploadTexturePool, update_rect: DeviceIntRect, update_stride: Option<i32>, data: &[u8], dest_texture_id: CacheTextureId, texture: &Texture, batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>, batch_upload_textures: &mut Vec<Texture>, batch_upload_copies: &mut Vec<BatchUploadCopy>, stats: &mut UploadStats )311 fn copy_into_staging_buffer<'a>(
312     device: &mut Device,
313     uploader: &mut TextureUploader< 'a>,
314     staging_texture_pool: &mut UploadTexturePool,
315     update_rect: DeviceIntRect,
316     update_stride: Option<i32>,
317     data: &[u8],
318     dest_texture_id: CacheTextureId,
319     texture: &Texture,
320     batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
321     batch_upload_textures: &mut Vec<Texture>,
322     batch_upload_copies: &mut Vec<BatchUploadCopy>,
323     stats: &mut UploadStats
324 ) {
325     let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())
326         .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
327 
328     // Allocate a region within the staging buffer for this update. If there is
329     // no room in an existing buffer then allocate another texture and buffer.
330     let (slice, origin) = match allocator.allocate(&update_rect.size()) {
331         Some((slice, origin)) => (slice, origin),
332         None => {
333             let new_slice = FreeRectSlice(buffers.len() as u32);
334             allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size());
335 
336             let texture_alloc_time_start = precise_time_ns();
337             let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
338             stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
339 
340             let texture_index = batch_upload_textures.len();
341             batch_upload_textures.push(staging_texture);
342 
343             let cpu_buffer_alloc_start_time = precise_time_ns();
344             let staging_buffer = match device.upload_method() {
345                 UploadMethod::Immediate => StagingBufferKind::CpuBuffer {
346                     bytes: staging_texture_pool.get_temporary_buffer(),
347                 },
348                 UploadMethod::PixelBuffer(_) => {
349                     let pbo = uploader.stage(
350                         device,
351                         texture.get_format(),
352                         BATCH_UPLOAD_TEXTURE_SIZE,
353                     ).unwrap();
354 
355                     StagingBufferKind::Pbo(pbo)
356                 }
357             };
358             stats.cpu_buffer_alloc_time += precise_time_ns() - cpu_buffer_alloc_start_time;
359 
360             buffers.push(BatchUploadBuffer {
361                 staging_buffer,
362                 texture_index,
363                 upload_rect: DeviceIntRect::zero()
364             });
365 
366             (new_slice, DeviceIntPoint::zero())
367         }
368     };
369     let buffer = &mut buffers[slice.0 as usize];
370     let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size());
371     buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);
372 
373     batch_upload_copies.push(BatchUploadCopy {
374         src_texture_index: buffer.texture_index,
375         src_offset: allocated_rect.min,
376         dest_texture_id,
377         dest_offset: update_rect.min,
378         size: update_rect.size(),
379     });
380 
381     unsafe {
382         let memcpy_start_time = precise_time_ns();
383         let bpp = texture.get_format().bytes_per_pixel() as usize;
384         let width_bytes = update_rect.width() as usize * bpp;
385         let src_stride = update_stride.map_or(width_bytes, |stride| {
386             assert!(stride >= 0);
387             stride as usize
388         });
389         let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes;
390         assert!(src_size <= data.len());
391 
392         let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);
393         let (dst_stride, dst) = match &mut buffer.staging_buffer {
394             StagingBufferKind::Pbo(buffer) => (
395                 buffer.get_stride(),
396                 buffer.get_mapping(),
397             ),
398             StagingBufferKind::CpuBuffer { bytes } => (
399                 BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,
400                 &mut bytes[..],
401             )
402         };
403 
404         // copy the data line-by-line in to the buffer so that we do not overwrite
405         // any other region of the buffer.
406         for y in 0..allocated_rect.height() as usize {
407             let src_start = y * src_stride;
408             let src_end = src_start + width_bytes;
409             let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride +
410                 allocated_rect.min.x as usize * bpp;
411             let dst_end = dst_start + width_bytes;
412 
413             dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
414         }
415 
416         stats.cpu_copy_time += precise_time_ns() - memcpy_start_time;
417     }
418 }
419 
420 
421 /// Copy from the staging PBOs or textures to texture cache textures using blit commands.
422 ///
423 /// Using blits instead of draw calls is supposedly more efficient but some drivers have
424 /// a very high per-command overhead so in some configurations we end up using
425 /// copy_from_staging_to_cache_using_draw_calls instead.
copy_from_staging_to_cache( renderer: &mut Renderer, batch_upload_textures: &[Texture], batch_upload_copies: Vec<BatchUploadCopy>, )426 fn copy_from_staging_to_cache(
427     renderer: &mut Renderer,
428     batch_upload_textures: &[Texture],
429     batch_upload_copies: Vec<BatchUploadCopy>,
430 ) {
431     for copy in batch_upload_copies {
432         let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
433 
434         renderer.device.copy_texture_sub_region(
435             &batch_upload_textures[copy.src_texture_index],
436             copy.src_offset.x as _,
437             copy.src_offset.y as _,
438             dest_texture,
439             copy.dest_offset.x as _,
440             copy.dest_offset.y as _,
441             copy.size.width as _,
442             copy.size.height as _,
443         );
444     }
445 }
446 
447 /// Generate and submit composite shader batches to copy from
448 /// the staging textures to the destination cache textures.
449 ///
450 /// If this shows up in GPU time ptofiles we could replace it with
451 /// a simpler shader (composite.glsl is already quite simple).
copy_from_staging_to_cache_using_draw_calls( renderer: &mut Renderer, stats: &mut UploadStats, batch_upload_textures: &[Texture], batch_upload_copies: Vec<BatchUploadCopy>, )452 fn copy_from_staging_to_cache_using_draw_calls(
453     renderer: &mut Renderer,
454     stats: &mut UploadStats,
455     batch_upload_textures: &[Texture],
456     batch_upload_copies: Vec<BatchUploadCopy>,
457 ) {
458     let mut dummy_stats = RendererStats {
459         total_draw_calls: 0,
460         alpha_target_count: 0,
461         color_target_count: 0,
462         texture_upload_mb: 0.0,
463         resource_upload_time: 0.0,
464         gpu_cache_upload_time: 0.0,
465         gecko_display_list_time: 0.0,
466         wr_display_list_time: 0.0,
467         scene_build_time: 0.0,
468         frame_build_time: 0.0,
469         full_display_list: false,
470         full_paint: false,
471     };
472 
473     let mut copy_instances = Vec::new();
474     let mut prev_src = None;
475     let mut prev_dst = None;
476 
477     for copy in batch_upload_copies {
478 
479         let src_changed = prev_src != Some(copy.src_texture_index);
480         let dst_changed = prev_dst != Some(copy.dest_texture_id);
481 
482         if (src_changed || dst_changed) && !copy_instances.is_empty() {
483 
484             renderer.draw_instanced_batch(
485                 &copy_instances,
486                 VertexArrayKind::Composite,
487                 // We bind the staging texture manually because it isn't known
488                 // to the texture resolver.
489                 &BatchTextures::empty(),
490                 &mut dummy_stats,
491             );
492 
493             stats.num_draw_calls += 1;
494             copy_instances.clear();
495         }
496 
497         if dst_changed {
498             let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
499             let target_size = dest_texture.get_dimensions();
500 
501             let draw_target = DrawTarget::from_texture(
502                 dest_texture,
503                 false,
504             );
505             renderer.device.bind_draw_target(draw_target);
506 
507             let projection = Transform3D::ortho(
508                 0.0,
509                 target_size.width as f32,
510                 0.0,
511                 target_size.height as f32,
512                 renderer.device.ortho_near_plane(),
513                 renderer.device.ortho_far_plane(),
514             );
515 
516             renderer.shaders
517                 .borrow_mut()
518                 .get_composite_shader(
519                     CompositeSurfaceFormat::Rgba,
520                     ImageBufferKind::Texture2D,
521                     CompositeFeatures::empty(),
522                 ).bind(
523                     &mut renderer.device,
524                     &projection,
525                     None,
526                     &mut renderer.renderer_errors
527                 );
528 
529             prev_dst = Some(copy.dest_texture_id);
530         }
531 
532         if src_changed {
533             renderer.device.bind_texture(
534                 TextureSampler::Color0,
535                 &batch_upload_textures[copy.src_texture_index],
536                 Swizzle::default(),
537             );
538 
539             prev_src = Some(copy.src_texture_index)
540         }
541 
542         let dest_rect = DeviceRect::from_origin_and_size(
543             copy.dest_offset.to_f32(),
544             copy.size.to_f32(),
545         );
546 
547         let src_rect = TexelRect::new(
548             copy.src_offset.x as f32,
549             copy.src_offset.y as f32,
550             (copy.src_offset.x + copy.size.width) as f32,
551             (copy.src_offset.y + copy.size.height) as f32,
552         );
553 
554         copy_instances.push(CompositeInstance::new_rgb(
555             dest_rect.cast_unit(),
556             dest_rect,
557             PremultipliedColorF::WHITE,
558             ZBufferId(0),
559             src_rect,
560             CompositorTransform::identity(),
561         ));
562     }
563 
564     if !copy_instances.is_empty() {
565         renderer.draw_instanced_batch(
566             &copy_instances,
567             VertexArrayKind::Composite,
568             // We bind the staging texture manually because it isn't known
569             // to the texture resolver.
570             &BatchTextures::empty(),
571             &mut dummy_stats,
572         );
573 
574         stats.num_draw_calls += 1;
575     }
576 }
577 
578 /// A very basic pool to avoid reallocating staging textures as well as staging
579 /// CPU side buffers.
580 pub struct UploadTexturePool {
581     /// The textures in the pool associated with a last used frame index.
582     ///
583     /// The outer array corresponds to each of teh three supported texture formats.
584     textures: [VecDeque<(Texture, u64)>; 3],
585     // Frame at which to deallocate some textures if there are too many in the pool,
586     // for each format.
587     delay_texture_deallocation: [u64; 3],
588     current_frame: u64,
589 
590     /// Temporary buffers that are used when using staging uploads + glTexImage2D.
591     ///
592     /// Temporary buffers aren't used asynchronously so they can be reused every frame.
593     /// To keep things simple we always allocate enough memory for formats with four bytes
594     /// per pixel (more than we need for alpha-only textures but it works just as well).
595     temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,
596     used_temporary_buffers: usize,
597     delay_buffer_deallocation: u64,
598 }
599 
600 impl UploadTexturePool {
new() -> Self601     pub fn new() -> Self {
602         UploadTexturePool {
603             textures: [VecDeque::new(), VecDeque::new(), VecDeque::new()],
604             delay_texture_deallocation: [0; 3],
605             current_frame: 0,
606             temporary_buffers: Vec::new(),
607             used_temporary_buffers: 0,
608             delay_buffer_deallocation: 0,
609         }
610     }
611 
format_index(&self, format: ImageFormat) -> usize612     fn format_index(&self, format: ImageFormat) -> usize {
613         match format {
614             ImageFormat::RGBA8 => 0,
615             ImageFormat::BGRA8 => 1,
616             ImageFormat::R8 => 2,
617             _ => { panic!("unexpected format"); }
618         }
619     }
620 
begin_frame(&mut self)621     pub fn begin_frame(&mut self) {
622         self.current_frame += 1;
623     }
624 
625     /// Create or reuse a staging texture.
626     ///
627     /// See also return_texture.
get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture628     pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {
629 
630         // First try to reuse a texture from the pool.
631         // "available" here means hasn't been used for 2 frames to avoid stalls.
632         // No need to scan the vector. Newer textures are always pushed at the back
633         // of the vector so we know the first element is the least recently used.
634         let format_idx = self.format_index(format);
635         let can_reuse = self.textures[format_idx].get(0)
636             .map(|tex| self.current_frame - tex.1 > 2)
637             .unwrap_or(false);
638 
639         if can_reuse {
640             return self.textures[format_idx].pop_front().unwrap().0;
641         }
642 
643         // If we couldn't find an available texture, create a new one.
644 
645         device.create_texture(
646             ImageBufferKind::Texture2D,
647             format,
648             BATCH_UPLOAD_TEXTURE_SIZE.width,
649             BATCH_UPLOAD_TEXTURE_SIZE.height,
650             TextureFilter::Nearest,
651             // Currently we need render target support as we always use glBlitFramebuffer
652             // to copy the texture data. Instead, we should use glCopyImageSubData on some
653             // platforms, and avoid creating the FBOs in that case.
654             Some(RenderTargetInfo { has_depth: false }),
655         )
656     }
657 
658     /// Hand the staging texture back to the pool after being done with uploads.
659     ///
660     /// The texture must have been obtained from this pool via get_texture.
return_texture(&mut self, texture: Texture)661     pub fn return_texture(&mut self, texture: Texture) {
662         let format_idx = self.format_index(texture.get_format());
663         self.textures[format_idx].push_back((texture, self.current_frame));
664     }
665 
666     /// Create or reuse a temporary CPU buffer.
667     ///
668     /// These buffers are used in the batched upload path when PBOs are not supported.
669     /// Content is first written to the temporary buffer and uploaded via a single
670     /// glTexSubImage2D call.
get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>>671     pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {
672         self.used_temporary_buffers += 1;
673         self.temporary_buffers.pop().unwrap_or_else(|| {
674             vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]
675         })
676     }
677 
678     /// Return memory that was obtained from this pool via get_temporary_buffer.
return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>)679     pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {
680         assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);
681         self.temporary_buffers.push(buffer);
682     }
683 
684     /// Deallocate this pool's CPU and GPU memory.
delete_textures(&mut self, device: &mut Device)685     pub fn delete_textures(&mut self, device: &mut Device) {
686         for format in &mut self.textures {
687             while let Some(texture) = format.pop_back() {
688                 device.delete_texture(texture.0)
689             }
690         }
691         self.temporary_buffers.clear();
692     }
693 
694     /// Deallocate some textures if there are too many for a long time.
end_frame(&mut self, device: &mut Device)695     pub fn end_frame(&mut self, device: &mut Device) {
696         for format_idx in 0..self.textures.len() {
697             // Count the number of reusable staging textures.
698             // if it stays high for a large number of frames, truncate it back to 8-ish
699             // over multiple frames.
700 
701             let mut num_reusable_textures = 0;
702             for texture in &self.textures[format_idx] {
703                 if self.current_frame - texture.1 > 2 {
704                     num_reusable_textures += 1;
705                 }
706             }
707 
708             if num_reusable_textures < 8 {
709                 // Don't deallocate textures for another 120 frames.
710                 self.delay_texture_deallocation[format_idx] = self.current_frame + 120;
711             }
712 
713             // Deallocate up to 4 staging textures every frame.
714             let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {
715                 num_reusable_textures.min(4)
716             } else {
717                 0
718             };
719 
720             for _ in 0..to_remove {
721                 let texture = self.textures[format_idx].pop_front().unwrap().0;
722                 device.delete_texture(texture);
723             }
724         }
725 
726         // Similar logic for temporary CPU buffers.
727         let unused_buffers = self.temporary_buffers.len() - self.used_temporary_buffers;
728         if unused_buffers < 8 {
729             self.delay_buffer_deallocation = self.current_frame + 120;
730         }
731         let to_remove = if self.current_frame > self.delay_buffer_deallocation  {
732             unused_buffers.min(4)
733         } else {
734             0
735         };
736         for _ in 0..to_remove {
737             // Unlike textures it doesn't matter whether we pop from the front or back
738             // of the vector.
739             self.temporary_buffers.pop();
740         }
741         self.used_temporary_buffers = 0;
742     }
743 
report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps)744     pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
745         for buf in &self.temporary_buffers {
746             report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };
747         }
748 
749         for format in &self.textures {
750             for texture in format {
751                 report.upload_staging_textures += texture.0.size_in_bytes();
752             }
753         }
754     }
755 }
756 
757 struct UploadStats {
758     num_draw_calls: u32,
759     upload_time: u64,
760     cpu_buffer_alloc_time: u64,
761     texture_alloc_time: u64,
762     cpu_copy_time: u64,
763     gpu_copy_commands_time: u64,
764     bytes_uploaded: usize,
765 }
766 
767 #[derive(Debug)]
768 enum StagingBufferKind<'a> {
769     Pbo(UploadStagingBuffer<'a>),
770     CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> }
771 }
772 #[derive(Debug)]
773 struct BatchUploadBuffer<'a> {
774     staging_buffer: StagingBufferKind<'a>,
775     texture_index: usize,
776     // A rectangle containing all items going into this staging texture, so
777     // that we can avoid uploading the entire area if we are using glTexSubImage2d.
778     upload_rect: DeviceIntRect,
779 }
780 
781 // On some devices performing many small texture uploads is slow, so instead we batch
782 // updates in to a small number of uploads to temporary textures, then copy from those
783 // textures to the correct place in the texture cache.
784 // A list of temporary textures that batches of updates are uploaded to.
785 #[derive(Debug)]
786 struct BatchUploadCopy {
787     // Index within batch_upload_textures
788     src_texture_index: usize,
789     src_offset: DeviceIntPoint,
790     dest_texture_id: CacheTextureId,
791     dest_offset: DeviceIntPoint,
792     size: DeviceIntSize,
793 }
794