1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 use std::{cmp, mem};
6 use api::units::*;
7 use malloc_size_of::MallocSizeOfOps;
8 use crate::{
9     device::{CustomVAO, Device, DrawTarget, Program, ReadTarget, Texture, TextureFilter, UploadPBOPool, VBO},
10     gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList},
11     internal_types::{RenderTargetInfo, Swizzle},
12     prim_store::DeferredResolve,
13     profiler,
14     render_api::MemoryReport,
15     internal_types::FrameId,
16 };
17 
18 /// Enabling this toggle would force the GPU cache scattered texture to
19 /// be resized every frame, which enables GPU debuggers to see if this
20 /// is performed correctly.
21 const GPU_CACHE_RESIZE_TEST: bool = false;
22 
23 /// Tracks the state of each row in the GPU cache texture.
24 struct CacheRow {
25     /// Mirrored block data on CPU for this row. We store a copy of
26     /// the data on the CPU side to improve upload batching.
27     cpu_blocks: Box<[GpuBlockData; super::MAX_VERTEX_TEXTURE_WIDTH]>,
28     /// The first offset in this row that is dirty.
29     min_dirty: u16,
30     /// The last offset in this row that is dirty.
31     max_dirty: u16,
32 }
33 
34 impl CacheRow {
new() -> Self35     fn new() -> Self {
36         CacheRow {
37             cpu_blocks: Box::new([GpuBlockData::EMPTY; super::MAX_VERTEX_TEXTURE_WIDTH]),
38             min_dirty: super::MAX_VERTEX_TEXTURE_WIDTH as _,
39             max_dirty: 0,
40         }
41     }
42 
is_dirty(&self) -> bool43     fn is_dirty(&self) -> bool {
44         return self.min_dirty < self.max_dirty;
45     }
46 
clear_dirty(&mut self)47     fn clear_dirty(&mut self) {
48         self.min_dirty = super::MAX_VERTEX_TEXTURE_WIDTH as _;
49         self.max_dirty = 0;
50     }
51 
add_dirty(&mut self, block_offset: usize, block_count: usize)52     fn add_dirty(&mut self, block_offset: usize, block_count: usize) {
53         self.min_dirty = self.min_dirty.min(block_offset as _);
54         self.max_dirty = self.max_dirty.max((block_offset + block_count) as _);
55     }
56 
dirty_blocks(&self) -> &[GpuBlockData]57     fn dirty_blocks(&self) -> &[GpuBlockData] {
58         return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize];
59     }
60 }
61 
62 /// The bus over which CPU and GPU versions of the GPU cache
63 /// get synchronized.
64 enum GpuCacheBus {
65     /// PBO-based updates, currently operate on a row granularity.
66     /// Therefore, are subject to fragmentation issues.
67     PixelBuffer {
68         /// Per-row data.
69         rows: Vec<CacheRow>,
70     },
71     /// Shader-based scattering updates. Currently rendered by a set
72     /// of points into the GPU texture, each carrying a `GpuBlockData`.
73     Scatter {
74         /// Special program to run the scattered update.
75         program: Program,
76         /// VAO containing the source vertex buffers.
77         vao: CustomVAO,
78         /// VBO for positional data, supplied as normalized `u16`.
79         buf_position: VBO<[u16; 2]>,
80         /// VBO for gpu block data.
81         buf_value: VBO<GpuBlockData>,
82         /// Currently stored block count.
83         count: usize,
84     },
85 }
86 
87 /// The device-specific representation of the cache texture in gpu_cache.rs
88 pub struct GpuCacheTexture {
89     texture: Option<Texture>,
90     bus: GpuCacheBus,
91 }
92 
93 impl GpuCacheTexture {
94     /// Ensures that we have an appropriately-sized texture.
ensure_texture(&mut self, device: &mut Device, height: i32)95     fn ensure_texture(&mut self, device: &mut Device, height: i32) {
96         // If we already have a texture that works, we're done.
97         if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
98             if GPU_CACHE_RESIZE_TEST {
99                 // Special debug mode - resize the texture even though it's fine.
100             } else {
101                 return;
102             }
103         }
104 
105         // Take the old texture, if any.
106         let blit_source = self.texture.take();
107 
108         // Create the new texture.
109         assert!(height >= 2, "Height is too small for ANGLE");
110         let new_size = DeviceIntSize::new(super::MAX_VERTEX_TEXTURE_WIDTH as _, height);
111         // GpuCacheBus::Scatter always requires the texture to be a render target. For
112         // GpuCacheBus::PixelBuffer, we only create the texture with a render target if
113         // RGBAF32 render targets are actually supported, and only if glCopyImageSubData
114         // is not. glCopyImageSubData does not require a render target to copy the texture
115         // data, and if neither RGBAF32 render targets nor glCopyImageSubData is supported,
116         // we simply re-upload the entire contents rather than copying upon resize.
117         let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
118         let supports_color_buffer_float = device.get_capabilities().supports_color_buffer_float;
119         let rt_info = if matches!(self.bus, GpuCacheBus::PixelBuffer { .. })
120             && (supports_copy_image_sub_data || !supports_color_buffer_float)
121         {
122             None
123         } else {
124             Some(RenderTargetInfo { has_depth: false })
125         };
126         let mut texture = device.create_texture(
127             api::ImageBufferKind::Texture2D,
128             api::ImageFormat::RGBAF32,
129             new_size.width,
130             new_size.height,
131             TextureFilter::Nearest,
132             rt_info,
133         );
134 
135         // Copy the contents of the previous texture, if applicable.
136         if let Some(blit_source) = blit_source {
137             if !supports_copy_image_sub_data && !supports_color_buffer_float {
138                 // Cannot copy texture, so must re-upload everything.
139                 match self.bus {
140                     GpuCacheBus::PixelBuffer { ref mut rows } => {
141                         for row in rows {
142                             row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
143                         }
144                     }
145                     GpuCacheBus::Scatter { .. } => {
146                         panic!("Texture must be copyable to use scatter GPU cache bus method");
147                     }
148                 }
149             } else {
150                 device.copy_entire_texture(&mut texture, &blit_source);
151             }
152             device.delete_texture(blit_source);
153         }
154 
155         self.texture = Some(texture);
156     }
157 
new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError>158     pub fn new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError> {
159         use super::desc::GPU_CACHE_UPDATE;
160 
161         let bus = if use_scatter {
162             assert!(
163                 device.get_capabilities().supports_color_buffer_float,
164                 "GpuCache scatter method requires EXT_color_buffer_float",
165             );
166             let program = device.create_program_linked(
167                 "gpu_cache_update",
168                 &[],
169                 &GPU_CACHE_UPDATE,
170             )?;
171             let buf_position = device.create_vbo();
172             let buf_value = device.create_vbo();
173             //Note: the vertex attributes have to be supplied in the same order
174             // as for program creation, but each assigned to a different stream.
175             let vao = device.create_custom_vao(&[
176                 buf_position.stream_with(&GPU_CACHE_UPDATE.vertex_attributes[0..1]),
177                 buf_value   .stream_with(&GPU_CACHE_UPDATE.vertex_attributes[1..2]),
178             ]);
179             GpuCacheBus::Scatter {
180                 program,
181                 vao,
182                 buf_position,
183                 buf_value,
184                 count: 0,
185             }
186         } else {
187             GpuCacheBus::PixelBuffer {
188                 rows: Vec::new(),
189             }
190         };
191 
192         Ok(GpuCacheTexture {
193             texture: None,
194             bus,
195         })
196     }
197 
deinit(mut self, device: &mut Device)198     pub fn deinit(mut self, device: &mut Device) {
199         if let Some(t) = self.texture.take() {
200             device.delete_texture(t);
201         }
202         if let GpuCacheBus::Scatter { program, vao, buf_position, buf_value, .. } = self.bus {
203             device.delete_program(program);
204             device.delete_custom_vao(vao);
205             device.delete_vbo(buf_position);
206             device.delete_vbo(buf_value);
207         }
208     }
209 
get_height(&self) -> i32210     pub fn get_height(&self) -> i32 {
211         self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
212     }
213 
214     #[cfg(feature = "capture")]
get_texture(&self) -> &Texture215     pub fn get_texture(&self) -> &Texture {
216         self.texture.as_ref().unwrap()
217     }
218 
prepare_for_updates( &mut self, device: &mut Device, total_block_count: usize, max_height: i32, )219     fn prepare_for_updates(
220         &mut self,
221         device: &mut Device,
222         total_block_count: usize,
223         max_height: i32,
224     ) {
225         self.ensure_texture(device, max_height);
226         match self.bus {
227             GpuCacheBus::PixelBuffer { .. } => {},
228             GpuCacheBus::Scatter {
229                 ref mut buf_position,
230                 ref mut buf_value,
231                 ref mut count,
232                 ..
233             } => {
234                 *count = 0;
235                 if total_block_count > buf_value.allocated_count() {
236                     device.allocate_vbo(buf_position, total_block_count, super::ONE_TIME_USAGE_HINT);
237                     device.allocate_vbo(buf_value,    total_block_count, super::ONE_TIME_USAGE_HINT);
238                 }
239             }
240         }
241     }
242 
invalidate(&mut self)243     pub fn invalidate(&mut self) {
244         match self.bus {
245             GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
246                 info!("Invalidating GPU caches");
247                 for row in rows {
248                     row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
249                 }
250             }
251             GpuCacheBus::Scatter { .. } => {
252                 warn!("Unable to invalidate scattered GPU cache");
253             }
254         }
255     }
256 
update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList)257     fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
258         match self.bus {
259             GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
260                 for update in &updates.updates {
261                     match *update {
262                         GpuCacheUpdate::Copy {
263                             block_index,
264                             block_count,
265                             address,
266                         } => {
267                             let row = address.v as usize;
268 
269                             // Ensure that the CPU-side shadow copy of the GPU cache data has enough
270                             // rows to apply this patch.
271                             while rows.len() <= row {
272                                 // Add a new row.
273                                 rows.push(CacheRow::new());
274                             }
275 
276                             // Copy the blocks from the patch array in the shadow CPU copy.
277                             let block_offset = address.u as usize;
278                             let data = &mut rows[row].cpu_blocks;
279                             for i in 0 .. block_count {
280                                 data[block_offset + i] = updates.blocks[block_index + i];
281                             }
282 
283                             // This row is dirty (needs to be updated in GPU texture).
284                             rows[row].add_dirty(block_offset, block_count);
285                         }
286                     }
287                 }
288             }
289             GpuCacheBus::Scatter {
290                 ref buf_position,
291                 ref buf_value,
292                 ref mut count,
293                 ..
294             } => {
295                 //TODO: re-use this heap allocation
296                 // Unused positions will be left as 0xFFFF, which translates to
297                 // (1.0, 1.0) in the vertex output position and gets culled out
298                 let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
299                 let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();
300 
301                 for update in &updates.updates {
302                     match *update {
303                         GpuCacheUpdate::Copy {
304                             block_index,
305                             block_count,
306                             address,
307                         } => {
308                             // Convert the absolute texel position into normalized
309                             let y = ((2*address.v as usize + 1) << 15) / size.height;
310                             for i in 0 .. block_count {
311                                 let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
312                                 position_data[block_index + i] = [x as _, y as _];
313                             }
314                         }
315                     }
316                 }
317 
318                 device.fill_vbo(buf_value, &updates.blocks, *count);
319                 device.fill_vbo(buf_position, &position_data, *count);
320                 *count += position_data.len();
321             }
322         }
323     }
324 
flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize325     fn flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize {
326         let texture = self.texture.as_ref().unwrap();
327         match self.bus {
328             GpuCacheBus::PixelBuffer { ref mut rows } => {
329                 let rows_dirty = rows
330                     .iter()
331                     .filter(|row| row.is_dirty())
332                     .count();
333                 if rows_dirty == 0 {
334                     return 0
335                 }
336 
337                 let mut uploader = device.upload_texture(pbo_pool);
338 
339                 for (row_index, row) in rows.iter_mut().enumerate() {
340                     if !row.is_dirty() {
341                         continue;
342                     }
343 
344                     let blocks = row.dirty_blocks();
345                     let rect = DeviceIntRect::from_origin_and_size(
346                         DeviceIntPoint::new(row.min_dirty as i32, row_index as i32),
347                         DeviceIntSize::new(blocks.len() as i32, 1),
348                     );
349 
350                     uploader.upload(device, texture, rect, None, None, blocks.as_ptr(), blocks.len());
351 
352                     row.clear_dirty();
353                 }
354 
355                 uploader.flush(device);
356 
357                 rows_dirty
358             }
359             GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
360                 device.disable_depth();
361                 device.set_blend(false);
362                 device.bind_program(program);
363                 device.bind_custom_vao(vao);
364                 device.bind_draw_target(
365                     DrawTarget::from_texture(
366                         texture,
367                         false,
368                     ),
369                 );
370                 device.draw_nonindexed_points(0, count as _);
371                 0
372             }
373         }
374     }
375 
376     #[cfg(feature = "replay")]
remove_texture(&mut self, device: &mut Device)377     pub fn remove_texture(&mut self, device: &mut Device) {
378         if let Some(t) = self.texture.take() {
379             device.delete_texture(t);
380         }
381     }
382 
383     #[cfg(feature = "replay")]
load_from_data(&mut self, texture: Texture, data: Vec<u8>)384     pub fn load_from_data(&mut self, texture: Texture, data: Vec<u8>) {
385         assert!(self.texture.is_none());
386         match self.bus {
387             GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
388                 let dim = texture.get_dimensions();
389                 let blocks = unsafe {
390                     std::slice::from_raw_parts(
391                         data.as_ptr() as *const GpuBlockData,
392                         data.len() / mem::size_of::<GpuBlockData>(),
393                     )
394                 };
395                 // fill up the CPU cache from the contents we just loaded
396                 rows.clear();
397                 rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
398                 let chunks = blocks.chunks(super::MAX_VERTEX_TEXTURE_WIDTH);
399                 debug_assert_eq!(chunks.len(), rows.len());
400                 for (row, chunk) in rows.iter_mut().zip(chunks) {
401                     row.cpu_blocks.copy_from_slice(chunk);
402                 }
403             }
404             GpuCacheBus::Scatter { .. } => {}
405         }
406         self.texture = Some(texture);
407     }
408 
report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps)409     pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
410         if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.bus {
411             for row in rows.iter() {
412                 report.gpu_cache_cpu_mirror += unsafe { (size_op_funs.size_of_op)(row.cpu_blocks.as_ptr() as *const _) };
413             }
414         }
415 
416         // GPU cache GPU memory.
417         report.gpu_cache_textures +=
418             self.texture.as_ref().map_or(0, |t| t.size_in_bytes());
419     }
420 }
421 
422 impl super::Renderer {
update_gpu_cache(&mut self)423     pub fn update_gpu_cache(&mut self) {
424         let _gm = self.gpu_profiler.start_marker("gpu cache update");
425 
426         // For an artificial stress test of GPU cache resizing,
427         // always pass an extra update list with at least one block in it.
428         let gpu_cache_height = self.gpu_cache_texture.get_height();
429         if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST {
430             self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
431                 frame_id: FrameId::INVALID,
432                 clear: false,
433                 height: gpu_cache_height,
434                 blocks: vec![[1f32; 4].into()],
435                 updates: Vec::new(),
436                 debug_commands: Vec::new(),
437             });
438         }
439 
440         let (updated_blocks, max_requested_height) = self
441             .pending_gpu_cache_updates
442             .iter()
443             .fold((0, gpu_cache_height), |(count, height), list| {
444                 (count + list.blocks.len(), cmp::max(height, list.height))
445             });
446 
447         if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow {
448             self.gpu_cache_overflow = true;
449             self.renderer_errors.push(super::RendererError::MaxTextureSize);
450         }
451 
452         // Note: if we decide to switch to scatter-style GPU cache update
453         // permanently, we can have this code nicer with `BufferUploader` kind
454         // of helper, similarly to how `TextureUploader` API is used.
455         self.gpu_cache_texture.prepare_for_updates(
456             &mut self.device,
457             updated_blocks,
458             max_requested_height,
459         );
460 
461         for update_list in self.pending_gpu_cache_updates.drain(..) {
462             assert!(update_list.height <= max_requested_height);
463             if update_list.frame_id > self.gpu_cache_frame_id {
464                 self.gpu_cache_frame_id = update_list.frame_id
465             }
466             self.gpu_cache_texture
467                 .update(&mut self.device, &update_list);
468         }
469 
470         self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME);
471         let updated_rows = self.gpu_cache_texture.flush(
472             &mut self.device,
473             &mut self.texture_upload_pbo_pool
474         );
475         self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME);
476 
477         self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows);
478         self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks);
479     }
480 
prepare_gpu_cache( &mut self, deferred_resolves: &[DeferredResolve], ) -> Result<(), super::RendererError>481     pub fn prepare_gpu_cache(
482         &mut self,
483         deferred_resolves: &[DeferredResolve],
484     ) -> Result<(), super::RendererError> {
485         if self.pending_gpu_cache_clear {
486             let use_scatter =
487                 matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
488             let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter)?;
489             let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
490             old_cache.deinit(&mut self.device);
491             self.pending_gpu_cache_clear = false;
492         }
493 
494         let deferred_update_list = self.update_deferred_resolves(deferred_resolves);
495         self.pending_gpu_cache_updates.extend(deferred_update_list);
496 
497         self.update_gpu_cache();
498 
499         // Note: the texture might have changed during the `update`,
500         // so we need to bind it here.
501         self.device.bind_texture(
502             super::TextureSampler::GpuCache,
503             self.gpu_cache_texture.texture.as_ref().unwrap(),
504             Swizzle::default(),
505         );
506 
507         Ok(())
508     }
509 
read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>)510     pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) {
511         let texture = self.gpu_cache_texture.texture.as_ref().unwrap();
512         let size = device_size_as_framebuffer_size(texture.get_dimensions());
513         let mut texels = vec![0; (size.width * size.height * 16) as usize];
514         self.device.begin_frame();
515         self.device.bind_read_target(ReadTarget::from_texture(texture));
516         self.device.read_pixels_into(
517             size.into(),
518             api::ImageFormat::RGBAF32,
519             &mut texels,
520         );
521         self.device.reset_read_target();
522         self.device.end_frame();
523         (texture.get_dimensions(), texels)
524     }
525 }
526