1 // This Source Code Form is subject to the terms of the Mozilla Public
2 // License, v. 2.0. If a copy of the MPL was not distributed with this
3 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 
5 //! This module contains the render task graph.
6 //!
7 //! Code associated with creating specific render tasks is in the render_task
8 //! module.
9 
10 use api::units::*;
11 use api::ImageFormat;
12 use crate::gpu_cache::{GpuCache, GpuCacheAddress};
13 use crate::internal_types::{TextureSource, CacheTextureId, FastHashMap, FastHashSet};
14 use crate::render_backend::FrameId;
15 use crate::render_task::{StaticRenderTaskSurface, RenderTaskLocation, RenderTask};
16 use crate::render_target::RenderTargetKind;
17 use crate::render_task::{RenderTaskData, RenderTaskKind};
18 use crate::resource_cache::ResourceCache;
19 use crate::texture_pack::GuillotineAllocator;
20 use crate::prim_store::DeferredResolve;
21 use crate::image_source::{resolve_image, resolve_cached_render_task};
22 use crate::util::VecHelper;
23 use smallvec::SmallVec;
24 use std::mem;
25 
26 use crate::render_target::{RenderTargetList, ColorRenderTarget};
27 use crate::render_target::{PictureCacheTarget, TextureCacheRenderTarget, AlphaRenderTarget};
28 use crate::util::Allocation;
29 use std::{usize, f32};
30 
31 /// According to apitrace, textures larger than 2048 break fast clear
32 /// optimizations on some intel drivers. We sometimes need to go larger, but
33 /// we try to avoid it.
34 const MAX_SHARED_SURFACE_SIZE: i32 = 2048;
35 
36 /// If we ever need a larger texture than the ideal, we better round it up to a
37 /// reasonable number in order to have a bit of leeway in case the size of this
38 /// this target is changing each frame.
39 const TEXTURE_DIMENSION_MASK: i32 = 0xFF;
40 
41 /// Allows initializing a render task directly into the render task buffer.
42 ///
43 /// See utils::VecHelpers. RenderTask is fairly large so avoiding the move when
44 /// pushing into the vector can save a lot of expensive memcpys on pages with many
45 /// render tasks.
46 pub struct RenderTaskAllocation<'a> {
47     pub alloc: Allocation<'a, RenderTask>,
48 }
49 
50 impl<'l> RenderTaskAllocation<'l> {
51     #[inline(always)]
init(self, value: RenderTask) -> RenderTaskId52     pub fn init(self, value: RenderTask) -> RenderTaskId {
53         RenderTaskId {
54             index: self.alloc.init(value) as u32,
55         }
56     }
57 }
58 
59 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
60 #[derive(MallocSizeOf)]
61 #[cfg_attr(feature = "capture", derive(Serialize))]
62 #[cfg_attr(feature = "replay", derive(Deserialize))]
63 pub struct RenderTaskId {
64     pub index: u32,
65 }
66 
67 impl RenderTaskId {
68     pub const INVALID: RenderTaskId = RenderTaskId {
69         index: u32::MAX,
70     };
71 }
72 
73 #[cfg_attr(feature = "capture", derive(Serialize))]
74 #[cfg_attr(feature = "replay", derive(Deserialize))]
75 #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq, PartialOrd, Ord)]
76 pub struct PassId(usize);
77 
78 impl PassId {
79     pub const MIN: PassId = PassId(0);
80     pub const MAX: PassId = PassId(!0);
81 }
82 
83 /// An internal representation of a dynamic surface that tasks can be
84 /// allocated into. Maintains some extra metadata about each surface
85 /// during the graph build.
86 #[cfg_attr(feature = "capture", derive(Serialize))]
87 #[cfg_attr(feature = "replay", derive(Deserialize))]
88 struct Surface {
89     /// Whether this is a color or alpha render target
90     kind: RenderTargetKind,
91     /// Allocator for this surface texture
92     allocator: GuillotineAllocator,
93     /// We can only allocate into this for reuse if it's a shared surface
94     is_shared: bool,
95 }
96 
97 impl Surface {
98     /// Allocate a rect within a shared surfce. Returns None if the
99     /// format doesn't match, or allocation fails.
alloc_rect( &mut self, size: DeviceIntSize, kind: RenderTargetKind, is_shared: bool, ) -> Option<DeviceIntPoint>100     fn alloc_rect(
101         &mut self,
102         size: DeviceIntSize,
103         kind: RenderTargetKind,
104         is_shared: bool,
105     ) -> Option<DeviceIntPoint> {
106         if self.kind == kind && self.is_shared == is_shared {
107             self.allocator
108                 .allocate(&size)
109                 .map(|(_slice, origin)| origin)
110         } else {
111             None
112         }
113     }
114 }
115 
116 /// A sub-pass can draw to either a dynamic (temporary render target) surface,
117 /// or a persistent surface (texture or picture cache).
118 #[cfg_attr(feature = "capture", derive(Serialize))]
119 #[cfg_attr(feature = "replay", derive(Deserialize))]
120 #[derive(Debug)]
121 pub enum SubPassSurface {
122     /// A temporary (intermediate) surface.
123     Dynamic {
124         /// The renderer texture id
125         texture_id: CacheTextureId,
126         /// Color / alpha render target
127         target_kind: RenderTargetKind,
128         /// The rectangle occupied by tasks in this surface. Used as a clear
129         /// optimization on some GPUs.
130         used_rect: DeviceIntRect,
131     },
132     Persistent {
133         /// Reference to the texture or picture cache surface being drawn to.
134         surface: StaticRenderTaskSurface,
135     },
136 }
137 
138 /// A subpass is a specific render target, and a list of tasks to draw to it.
139 #[cfg_attr(feature = "capture", derive(Serialize))]
140 #[cfg_attr(feature = "replay", derive(Deserialize))]
141 pub struct SubPass {
142     /// The surface this subpass draws to
143     pub surface: SubPassSurface,
144     /// The tasks assigned to this subpass.
145     pub task_ids: Vec<RenderTaskId>,
146 }
147 
148 /// A pass expresses dependencies between tasks. Each pass consists of a number
149 /// of subpasses.
150 #[cfg_attr(feature = "capture", derive(Serialize))]
151 #[cfg_attr(feature = "replay", derive(Deserialize))]
152 pub struct Pass {
153     /// The tasks assigned to this render pass
154     pub task_ids: Vec<RenderTaskId>,
155     /// The subpasses that make up this dependency pass
156     pub sub_passes: Vec<SubPass>,
157     /// A list of intermediate surfaces that can be invalidated after
158     /// this pass completes.
159     pub textures_to_invalidate: Vec<CacheTextureId>,
160 }
161 
162 /// The RenderTaskGraph is the immutable representation of the render task graph. It is
163 /// built by the RenderTaskGraphBuilder, and is constructed once per frame.
164 #[cfg_attr(feature = "capture", derive(Serialize))]
165 #[cfg_attr(feature = "replay", derive(Deserialize))]
166 pub struct RenderTaskGraph {
167     /// List of tasks added to the graph
168     pub tasks: Vec<RenderTask>,
169 
170     /// The passes that were created, based on dependencies between tasks
171     pub passes: Vec<Pass>,
172 
173     /// Current frame id, used for debug validation
174     frame_id: FrameId,
175 
176     /// GPU specific data for each task that is made available to shaders
177     pub task_data: Vec<RenderTaskData>,
178 
179     /// Total number of intermediate surfaces that will be drawn to, used for test validation.
180     #[cfg(test)]
181     surface_count: usize,
182 
183     /// Total number of real allocated textures that will be drawn to, used for test validation.
184     #[cfg(test)]
185     unique_surfaces: FastHashSet<CacheTextureId>,
186 }
187 
188 /// The persistent interface that is used during frame building to construct the
189 /// frame graph.
190 pub struct RenderTaskGraphBuilder {
191     /// List of tasks added to the builder
192     tasks: Vec<RenderTask>,
193 
194     /// List of task roots
195     roots: FastHashSet<RenderTaskId>,
196 
197     /// Input dependencies where the input is a persistent target,
198     /// rather than a specific render task id. Useful for expressing
199     /// when a task relies on a readback of a surface that is partially
200     /// drawn to.
201     target_inputs: Vec<(RenderTaskId, StaticRenderTaskSurface)>,
202 
203     /// Current frame id, used for debug validation
204     frame_id: FrameId,
205 
206     /// A list of texture surfaces that can be freed at the end of a pass. Retained
207     /// here to reduce heap allocations.
208     textures_to_free: FastHashSet<CacheTextureId>,
209 
210     // Keep a map of `texture_id` to metadata about surfaces that are currently
211     // borrowed from the render target pool.
212     active_surfaces: FastHashMap<CacheTextureId, Surface>,
213 
214     /// A temporary buffer used by assign_free_pass. Kept here to avoid heap reallocs
215     child_task_buffer: Vec<RenderTaskId>,
216 }
217 
218 impl RenderTaskGraphBuilder {
219     /// Construct a new graph builder. Typically constructed once and maintained
220     /// over many frames, to avoid extra heap allocations where possible.
new() -> Self221     pub fn new() -> Self {
222         RenderTaskGraphBuilder {
223             tasks: Vec::new(),
224             roots: FastHashSet::default(),
225             target_inputs: Vec::new(),
226             frame_id: FrameId::INVALID,
227             textures_to_free: FastHashSet::default(),
228             active_surfaces: FastHashMap::default(),
229             child_task_buffer: Vec::new(),
230         }
231     }
232 
frame_id(&self) -> FrameId233     pub fn frame_id(&self) -> FrameId {
234         self.frame_id
235     }
236 
237     /// Begin a new frame
begin_frame(&mut self, frame_id: FrameId)238     pub fn begin_frame(&mut self, frame_id: FrameId) {
239         self.frame_id = frame_id;
240         self.roots.clear();
241     }
242 
243     /// Get immutable access to a task
244     // TODO(gw): There's only a couple of places that existing code needs to access
245     //           a task during the building step. Perhaps we can remove this?
get_task( &self, task_id: RenderTaskId, ) -> &RenderTask246     pub fn get_task(
247         &self,
248         task_id: RenderTaskId,
249     ) -> &RenderTask {
250         &self.tasks[task_id.index as usize]
251     }
252 
253     /// Get mutable access to a task
254     // TODO(gw): There's only a couple of places that existing code needs to access
255     //           a task during the building step. Perhaps we can remove this?
get_task_mut( &mut self, task_id: RenderTaskId, ) -> &mut RenderTask256     pub fn get_task_mut(
257         &mut self,
258         task_id: RenderTaskId,
259     ) -> &mut RenderTask {
260         &mut self.tasks[task_id.index as usize]
261     }
262 
263     /// Add a new task to the graph.
add(&mut self) -> RenderTaskAllocation264     pub fn add(&mut self) -> RenderTaskAllocation {
265         // Assume every task is a root to start with
266         self.roots.insert(
267             RenderTaskId { index: self.tasks.len() as u32 }
268         );
269 
270         RenderTaskAllocation {
271             alloc: self.tasks.alloc(),
272         }
273     }
274 
275     /// Express a dependency, such that `task_id` depends on `input` as a texture source.
add_dependency( &mut self, task_id: RenderTaskId, input: RenderTaskId, )276     pub fn add_dependency(
277         &mut self,
278         task_id: RenderTaskId,
279         input: RenderTaskId,
280     ) {
281         self.tasks[task_id.index as usize].children.push(input);
282 
283         // Once a task is an input, it's no longer a root
284         self.roots.remove(&input);
285     }
286 
287     /// Register a persistent surface as an input dependency of a task (readback).
add_target_input( &mut self, task_id: RenderTaskId, target: StaticRenderTaskSurface, )288     pub fn add_target_input(
289         &mut self,
290         task_id: RenderTaskId,
291         target: StaticRenderTaskSurface,
292     ) {
293         self.target_inputs.push((task_id, target));
294     }
295 
296     /// End the graph building phase and produce the immutable task graph for this frame
end_frame( &mut self, resource_cache: &mut ResourceCache, gpu_cache: &mut GpuCache, deferred_resolves: &mut Vec<DeferredResolve>, ) -> RenderTaskGraph297     pub fn end_frame(
298         &mut self,
299         resource_cache: &mut ResourceCache,
300         gpu_cache: &mut GpuCache,
301         deferred_resolves: &mut Vec<DeferredResolve>,
302     ) -> RenderTaskGraph {
303         // Copy the render tasks over to the immutable graph output
304         let task_count = self.tasks.len();
305         let tasks = mem::replace(
306             &mut self.tasks,
307             Vec::with_capacity(task_count),
308         );
309 
310         let mut graph = RenderTaskGraph {
311             tasks,
312             passes: Vec::new(),
313             task_data: Vec::with_capacity(task_count),
314             frame_id: self.frame_id,
315             #[cfg(test)]
316             surface_count: 0,
317             #[cfg(test)]
318             unique_surfaces: FastHashSet::default(),
319         };
320 
321         // Handle late mapping of dependencies on a specific persistent target.
322         // NOTE: This functionality isn't used by current callers of the frame graph, but
323         //       will be used in future (for example, to express readbacks of partially
324         //       rendered picture tiles for mix-blend-mode etc).
325         if !self.target_inputs.is_empty() {
326             // Create a mapping from persistent surface id -> render task root (used below):
327             let mut roots = FastHashMap::default();
328             roots.reserve(self.roots.len());
329             for root_id in &self.roots {
330                 let task = &graph.tasks[root_id.index as usize];
331                 match task.location {
332                     RenderTaskLocation::Static { ref surface, .. } => {
333                         // We should never encounter a graph where the same surface is a
334                         // render root more than one.
335                         assert!(!roots.contains_key(surface));
336                         roots.insert(surface.clone(), *root_id);
337                     }
338                     RenderTaskLocation::Dynamic { .. }
339                     | RenderTaskLocation::CacheRequest { .. }
340                     | RenderTaskLocation::Unallocated { .. } => {
341                         // Intermediate surfaces can't be render roots, they should always
342                         // be a dependency of a render root.
343                         panic!("bug: invalid root");
344                     }
345                 }
346             }
347             assert_eq!(roots.len(), self.roots.len());
348 
349             // Now resolve those dependencies on persistent targets and add them
350             // as a render task dependency.
351             for (task_id, target_id) in self.target_inputs.drain(..) {
352                 match roots.get(&target_id) {
353                     Some(root_task_id) => {
354                         graph.tasks[task_id.index as usize].children.push(*root_task_id);
355                         self.roots.remove(root_task_id);
356                     }
357                     None => {
358                         println!("WARN: {:?} depends on root {:?} but it has no tasks!",
359                             task_id,
360                             target_id,
361                         );
362                     }
363                 }
364             }
365         }
366 
367         // Two traversals of the graph are required. The first pass determines how many passes
368         // are required, and assigns render tasks a pass to be drawn on. The second pass determines
369         // when the last time a render task is used as an input, and assigns what pass the surface
370         // backing that render task can be freed (the surface is then returned to the render target
371         // pool and may be aliased / reused during subsequent passes).
372 
373         let mut pass_count = 0;
374 
375         // Traverse each root, and assign `render_on` for each task and count number of required passes
376         for root_id in &self.roots {
377             assign_render_pass(
378                 *root_id,
379                 PassId(0),
380                 &mut graph,
381                 &mut pass_count,
382             );
383         }
384 
385         // Determine which pass each task can be freed on, which depends on which is
386         // the last task that has this as an input.
387         for i in 0 .. graph.tasks.len() {
388             let task_id = RenderTaskId { index: i as u32 };
389             assign_free_pass(
390                 task_id,
391                 &mut self.child_task_buffer,
392                 &mut graph,
393             );
394         }
395 
396         // Construct passes array for tasks to be assigned to below
397         for _ in 0 .. pass_count+1 {
398             graph.passes.push(Pass {
399                 task_ids: Vec::new(),
400                 sub_passes: Vec::new(),
401                 textures_to_invalidate: Vec::new(),
402             });
403         }
404 
405         // Assign tasks to each pass based on their `render_on` attribute
406         for (index, task) in graph.tasks.iter().enumerate() {
407             if task.kind.is_a_rendering_operation() {
408                 let id = RenderTaskId { index: index as u32 };
409                 graph.passes[task.render_on.0].task_ids.push(id);
410             }
411         }
412 
413         // At this point, tasks are assigned to each dependency pass. Now we
414         // can go through each pass and create sub-passes, assigning each task
415         // to a target and destination rect.
416         assert!(self.active_surfaces.is_empty());
417 
418         for (pass_id, pass) in graph.passes.iter_mut().enumerate().rev() {
419             assert!(self.textures_to_free.is_empty());
420 
421             for task_id in &pass.task_ids {
422                 let task = &mut graph.tasks[task_id.index as usize];
423 
424                 match task.location {
425                     RenderTaskLocation::Unallocated { size } => {
426                         let mut location = None;
427                         let kind = task.kind.target_kind();
428 
429                         // Allow this render task to use a shared surface target if it
430                         // is freed straight after this pass. Tasks that must remain
431                         // allocated for inputs on subsequent passes are always assigned
432                         // to a standalone surface, to simplify lifetime management of
433                         // render targets.
434 
435                         let can_use_shared_surface =
436                             task.render_on == PassId(task.free_after.0 + 1);
437 
438                         if can_use_shared_surface {
439                             // If we can use a shared surface, step through the existing shared
440                             // surfaces for this subpass, and see if we can allocate the task
441                             // to one of these targets.
442                             for sub_pass in &mut pass.sub_passes {
443                                 if let SubPassSurface::Dynamic { texture_id, ref mut used_rect, .. } = sub_pass.surface {
444                                     let surface = self.active_surfaces.get_mut(&texture_id).unwrap();
445                                     if let Some(p) = surface.alloc_rect(size, kind, true) {
446                                         location = Some((texture_id, p));
447                                         *used_rect = used_rect.union(&DeviceIntRect::from_origin_and_size(p, size));
448                                         sub_pass.task_ids.push(*task_id);
449                                         break;
450                                     }
451                                 }
452                             }
453                         }
454 
455                         if location.is_none() {
456                             // If it wasn't possible to allocate the task to a shared surface, get a new
457                             // render target from the resource cache pool/
458 
459                             // If this is a really large task, don't bother allocating it as a potential
460                             // shared surface for other tasks.
461 
462                             let can_use_shared_surface = can_use_shared_surface &&
463                                 size.width <= MAX_SHARED_SURFACE_SIZE &&
464                                 size.height <= MAX_SHARED_SURFACE_SIZE;
465 
466                             let surface_size = if can_use_shared_surface {
467                                 DeviceIntSize::new(
468                                     MAX_SHARED_SURFACE_SIZE,
469                                     MAX_SHARED_SURFACE_SIZE,
470                                 )
471                             } else {
472                                 // Round up size here to avoid constant re-allocs during resizing
473                                 DeviceIntSize::new(
474                                     (size.width + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK,
475                                     (size.height + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK,
476                                 )
477                             };
478 
479                             let format = match kind {
480                                 RenderTargetKind::Color => ImageFormat::RGBA8,
481                                 RenderTargetKind::Alpha => ImageFormat::R8,
482                             };
483 
484                             // Get render target of appropriate size and format from resource cache
485                             let texture_id = resource_cache.get_or_create_render_target_from_pool(
486                                 surface_size,
487                                 format,
488                             );
489 
490                             // Allocate metadata we need about this surface while it's active
491                             let mut surface = Surface {
492                                 kind,
493                                 allocator: GuillotineAllocator::new(Some(surface_size)),
494                                 is_shared: can_use_shared_surface,
495                             };
496 
497                             // Allocation of the task must fit in this new surface!
498                             let p = surface.alloc_rect(
499                                 size,
500                                 kind,
501                                 can_use_shared_surface,
502                             ).expect("bug: alloc must succeed!");
503 
504                             location = Some((texture_id, p));
505 
506                             // Store the metadata about this newly active surface. We should never
507                             // get a target surface with the same texture_id as a currently active surface.
508                             let _prev_surface = self.active_surfaces.insert(texture_id, surface);
509                             assert!(_prev_surface.is_none());
510 
511                             // Store some information about surface allocations if in test mode
512                             #[cfg(test)]
513                             {
514                                 graph.surface_count += 1;
515                                 graph.unique_surfaces.insert(texture_id);
516                             }
517 
518                             // Add the target as a new subpass for this render pass.
519                             pass.sub_passes.push(SubPass {
520                                 surface: SubPassSurface::Dynamic {
521                                     texture_id,
522                                     target_kind: kind,
523                                     used_rect: DeviceIntRect::from_origin_and_size(p, size),
524                                 },
525                                 task_ids: vec![*task_id],
526                             });
527                         }
528 
529                         // By now, we must have allocated a surface and rect for this task, so assign it!
530                         assert!(location.is_some());
531                         task.location = RenderTaskLocation::Dynamic {
532                             texture_id: location.unwrap().0,
533                             rect: DeviceIntRect::from_origin_and_size(location.unwrap().1, size),
534                         };
535                     }
536                     RenderTaskLocation::Static { ref surface, .. } => {
537                         // No need to allocate for this surface, since it's a persistent
538                         // target. Instead, just create a new sub-pass for it.
539                         pass.sub_passes.push(SubPass {
540                             surface: SubPassSurface::Persistent {
541                                 surface: surface.clone(),
542                             },
543                             task_ids: vec![*task_id],
544                         });
545                     }
546                     RenderTaskLocation::CacheRequest { .. } => {
547                         // No need to allocate nor to create a sub-path for read-only locations.
548                     }
549                     RenderTaskLocation::Dynamic { .. } => {
550                         // Dynamic tasks shouldn't be allocated by this point
551                         panic!("bug: encountered an already allocated task");
552                     }
553                 }
554 
555                 // Return the shared surfaces from this pass
556                 let task = &graph.tasks[task_id.index as usize];
557                 for child_id in &task.children {
558                     let child_task = &graph.tasks[child_id.index as usize];
559                     match child_task.location {
560                         RenderTaskLocation::Unallocated { .. } => panic!("bug: must be allocated"),
561                         RenderTaskLocation::Dynamic { texture_id, .. } => {
562                             // If this task can be freed after this pass, include it in the
563                             // unique set of textures to be returned to the render target pool below.
564                             if child_task.free_after == PassId(pass_id) {
565                                 self.textures_to_free.insert(texture_id);
566                             }
567                         }
568                         RenderTaskLocation::Static { .. } => {}
569                         RenderTaskLocation::CacheRequest { .. } => {}
570                     }
571                 }
572             }
573 
574             // Return no longer used textures to the pool, so that they can be reused / aliased
575             // by later passes.
576             for texture_id in self.textures_to_free.drain() {
577                 resource_cache.return_render_target_to_pool(texture_id);
578                 self.active_surfaces.remove(&texture_id).unwrap();
579                 pass.textures_to_invalidate.push(texture_id);
580             }
581         }
582 
583         // By now, all surfaces that were borrowed from the render target pool must
584         // be returned to the resource cache, or we are leaking intermediate surfaces!
585         assert!(self.active_surfaces.is_empty());
586 
587         // Each task is now allocated to a surface and target rect. Write that to the
588         // GPU blocks and task_data. After this point, the graph is returned and is
589         // considered to be immutable for the rest of the frame building process.
590 
591         for task in &mut graph.tasks {
592             // First check whether the render task texture and uv rects are managed
593             // externally. This is the case for image tasks and cached tasks. In both
594             // cases it results in a finding the information in the texture cache.
595             let cache_item = if let Some(ref cache_handle) = task.cache_handle {
596                 Some(resolve_cached_render_task(
597                     cache_handle,
598                     resource_cache,
599                 ))
600             } else if let RenderTaskKind::Image(request) = &task.kind {
601                 Some(resolve_image(
602                     *request,
603                     resource_cache,
604                     gpu_cache,
605                     deferred_resolves,
606                 ))
607             } else {
608                 // General case (non-cached non-image tasks).
609                 None
610             };
611 
612             if let Some(cache_item) = cache_item {
613                 // Update the render task even if the item is invalid.
614                 // We'll handle it later and it's easier to not have to
615                 // deal with unexpected location variants like
616                 // RenderTaskLocation::CacheRequest when we do.
617                 let source = cache_item.texture_id;
618                 task.uv_rect_handle = cache_item.uv_rect_handle;
619                 task.location = RenderTaskLocation::Static {
620                     surface: StaticRenderTaskSurface::ReadOnly { source },
621                     rect: cache_item.uv_rect,
622                 };
623             }
624             // Give the render task an opportunity to add any
625             // information to the GPU cache, if appropriate.
626             let target_rect = task.get_target_rect();
627 
628             task.write_gpu_blocks(
629                 target_rect,
630                 gpu_cache,
631             );
632 
633             graph.task_data.push(
634                 task.kind.write_task_data(target_rect)
635             );
636         }
637 
638         graph
639     }
640 }
641 
642 impl RenderTaskGraph {
643     /// Print the render task graph to console
644     #[allow(dead_code)]
print( &self, )645     pub fn print(
646         &self,
647     ) {
648         println!("-- RenderTaskGraph --");
649 
650         for (i, task) in self.tasks.iter().enumerate() {
651             println!("Task {}: render_on={} free_after={} {:?}",
652                 i,
653                 task.render_on.0,
654                 task.free_after.0,
655                 task.kind.as_str(),
656             );
657         }
658 
659         for (p, pass) in self.passes.iter().enumerate() {
660             println!("Pass {}:", p);
661 
662             for (s, sub_pass) in pass.sub_passes.iter().enumerate() {
663                 println!("\tSubPass {}: {:?}",
664                     s,
665                     sub_pass.surface,
666                 );
667 
668                 for task_id in &sub_pass.task_ids {
669                     println!("\t\tTask {:?}", task_id.index);
670                 }
671             }
672         }
673     }
674 
resolve_location( &self, task_id: impl Into<Option<RenderTaskId>>, gpu_cache: &GpuCache, ) -> Option<(GpuCacheAddress, TextureSource)>675     pub fn resolve_location(
676         &self,
677         task_id: impl Into<Option<RenderTaskId>>,
678         gpu_cache: &GpuCache,
679     ) -> Option<(GpuCacheAddress, TextureSource)> {
680         self.resolve_impl(task_id.into()?, gpu_cache)
681     }
682 
resolve_impl( &self, task_id: RenderTaskId, gpu_cache: &GpuCache, ) -> Option<(GpuCacheAddress, TextureSource)>683     fn resolve_impl(
684         &self,
685         task_id: RenderTaskId,
686         gpu_cache: &GpuCache,
687     ) -> Option<(GpuCacheAddress, TextureSource)> {
688         let task = &self[task_id];
689         let texture_source = task.get_texture_source();
690 
691         if let TextureSource::Invalid = texture_source {
692             return None;
693         }
694 
695         let uv_address = task.get_texture_address(gpu_cache);
696 
697         Some((uv_address, texture_source))
698     }
699 
700 
701     /// Return the surface and texture counts, used for testing
702     #[cfg(test)]
surface_counts(&self) -> (usize, usize)703     pub fn surface_counts(&self) -> (usize, usize) {
704         (self.surface_count, self.unique_surfaces.len())
705     }
706 
707     /// Return current frame id, used for validation
708     #[cfg(debug_assertions)]
frame_id(&self) -> FrameId709     pub fn frame_id(&self) -> FrameId {
710         self.frame_id
711     }
712 }
713 
714 /// Batching uses index access to read information about tasks
715 impl std::ops::Index<RenderTaskId> for RenderTaskGraph {
716     type Output = RenderTask;
index(&self, id: RenderTaskId) -> &RenderTask717     fn index(&self, id: RenderTaskId) -> &RenderTask {
718         &self.tasks[id.index as usize]
719     }
720 }
721 
722 /// Recursive helper to assign pass that a task should render on
assign_render_pass( id: RenderTaskId, pass: PassId, graph: &mut RenderTaskGraph, pass_count: &mut usize, )723 fn assign_render_pass(
724     id: RenderTaskId,
725     pass: PassId,
726     graph: &mut RenderTaskGraph,
727     pass_count: &mut usize,
728 ) {
729     let task = &mut graph.tasks[id.index as usize];
730 
731     // No point in recursing into paths in the graph if this task already
732     // has been set to draw after this pass.
733     if task.render_on > pass {
734         return;
735     }
736 
737     let next_pass = if task.kind.is_a_rendering_operation() {
738         // Keep count of number of passes needed
739         *pass_count = pass.0.max(*pass_count);
740         PassId(pass.0 + 1)
741     } else {
742         // If the node is not a rendering operation, it doesn't create a
743         // render pass, so we don't increment the pass count.
744         // For now we expect non-rendering nodes to be leafs of the graph.
745         // We don't strictly depend on it but it simplifies the mental model.
746         debug_assert!(task.children.is_empty());
747         pass
748     };
749 
750     // A task should be rendered on the earliest pass in the dependency
751     // graph that it's required. Using max here ensures the correct value
752     // in the presence of multiple paths to this task from the root(s).
753     task.render_on = task.render_on.max(pass);
754 
755     // TODO(gw): Work around the borrowck - maybe we could structure the dependencies
756     //           storage better, to avoid this?
757     let mut child_task_ids: SmallVec<[RenderTaskId; 8]> = SmallVec::new();
758     child_task_ids.extend_from_slice(&task.children);
759 
760     for child_id in child_task_ids {
761         assign_render_pass(
762             child_id,
763             next_pass,
764             graph,
765             pass_count,
766         );
767     }
768 }
769 
assign_free_pass( id: RenderTaskId, child_task_buffer: &mut Vec<RenderTaskId>, graph: &mut RenderTaskGraph, )770 fn assign_free_pass(
771     id: RenderTaskId,
772     child_task_buffer: &mut Vec<RenderTaskId>,
773     graph: &mut RenderTaskGraph,
774 ) {
775     let task = &graph.tasks[id.index as usize];
776     let render_on = task.render_on;
777     debug_assert!(child_task_buffer.is_empty());
778 
779     // TODO(gw): Work around the borrowck - maybe we could structure the dependencies
780     //           storage better, to avoid this?
781     child_task_buffer.extend_from_slice(&task.children);
782 
783     for child_id in child_task_buffer.drain(..) {
784         let child_task = &mut graph.tasks[child_id.index as usize];
785 
786         // Each dynamic child task can free its backing surface after the last
787         // task that references it as an input. Using min here ensures the
788         // safe time to free this surface in the presence of multiple paths
789         // to this task from the root(s).
790         match child_task.location {
791             RenderTaskLocation::CacheRequest { .. } => {}
792             RenderTaskLocation::Static { .. } => {
793                 // never get freed anyway, so can leave untouched
794                 // (could validate that they remain at PassId::MIN)
795             }
796             RenderTaskLocation::Unallocated { .. } => {
797                 child_task.free_after = child_task.free_after.min(render_on);
798             }
799             RenderTaskLocation::Dynamic { .. } => {
800                 panic!("bug: should not be allocated yet");
801             }
802         }
803     }
804 }
805 
806 /// A render pass represents a set of rendering operations that don't depend on one
807 /// another.
808 ///
809 /// A render pass can have several render targets if there wasn't enough space in one
810 /// target to do all of the rendering for that pass. See `RenderTargetList`.
811 #[cfg_attr(feature = "capture", derive(Serialize))]
812 #[cfg_attr(feature = "replay", derive(Deserialize))]
813 pub struct RenderPass {
814     /// The subpasses that describe targets being rendered to in this pass
815     pub alpha: RenderTargetList<AlphaRenderTarget>,
816     pub color: RenderTargetList<ColorRenderTarget>,
817     pub texture_cache: FastHashMap<CacheTextureId, TextureCacheRenderTarget>,
818     pub picture_cache: Vec<PictureCacheTarget>,
819     pub textures_to_invalidate: Vec<CacheTextureId>,
820 }
821 
822 impl RenderPass {
823     /// Creates an intermediate off-screen pass.
new(src: &Pass) -> Self824     pub fn new(src: &Pass) -> Self {
825         RenderPass {
826             color: RenderTargetList::new(
827                 ImageFormat::RGBA8,
828             ),
829             alpha: RenderTargetList::new(
830                 ImageFormat::R8,
831             ),
832             texture_cache: FastHashMap::default(),
833             picture_cache: Vec::new(),
834             textures_to_invalidate: src.textures_to_invalidate.clone(),
835         }
836     }
837 }
838 
839 // Dump an SVG visualization of the render graph for debugging purposes
840 #[cfg(feature = "capture")]
dump_render_tasks_as_svg( render_tasks: &RenderTaskGraph, output: &mut dyn std::io::Write, ) -> std::io::Result<()>841 pub fn dump_render_tasks_as_svg(
842     render_tasks: &RenderTaskGraph,
843     output: &mut dyn std::io::Write,
844 ) -> std::io::Result<()> {
845     use svg_fmt::*;
846 
847     let node_width = 80.0;
848     let node_height = 30.0;
849     let vertical_spacing = 8.0;
850     let horizontal_spacing = 20.0;
851     let margin = 10.0;
852     let text_size = 10.0;
853 
854     let mut pass_rects = Vec::new();
855     let mut nodes = vec![None; render_tasks.tasks.len()];
856 
857     let mut x = margin;
858     let mut max_y: f32 = 0.0;
859 
860     #[derive(Clone)]
861     struct Node {
862         rect: Rectangle,
863         label: Text,
864         size: Text,
865     }
866 
867     for pass in render_tasks.passes.iter().rev() {
868         let mut layout = VerticalLayout::new(x, margin, node_width);
869 
870         for task_id in &pass.task_ids {
871             let task_index = task_id.index as usize;
872             let task = &render_tasks.tasks[task_index];
873 
874             let rect = layout.push_rectangle(node_height);
875 
876             let tx = rect.x + rect.w / 2.0;
877             let ty = rect.y + 10.0;
878 
879             let label = text(tx, ty, format!("{}", task.kind.as_str()));
880             let size = text(tx, ty + 12.0, format!("{:?}", task.location.size()));
881 
882             nodes[task_index] = Some(Node { rect, label, size });
883 
884             layout.advance(vertical_spacing);
885         }
886 
887         pass_rects.push(layout.total_rectangle());
888 
889         x += node_width + horizontal_spacing;
890         max_y = max_y.max(layout.y + margin);
891     }
892 
893     let mut links = Vec::new();
894     for node_index in 0..nodes.len() {
895         if nodes[node_index].is_none() {
896             continue;
897         }
898 
899         let task = &render_tasks.tasks[node_index];
900         for dep in &task.children {
901             let dep_index = dep.index as usize;
902 
903             if let (&Some(ref node), &Some(ref dep_node)) = (&nodes[node_index], &nodes[dep_index]) {
904                 links.push((
905                     dep_node.rect.x + dep_node.rect.w,
906                     dep_node.rect.y + dep_node.rect.h / 2.0,
907                     node.rect.x,
908                     node.rect.y + node.rect.h / 2.0,
909                 ));
910             }
911         }
912     }
913 
914     let svg_w = x + margin;
915     let svg_h = max_y + margin;
916     writeln!(output, "{}", BeginSvg { w: svg_w, h: svg_h })?;
917 
918     // Background.
919     writeln!(output,
920         "    {}",
921         rectangle(0.0, 0.0, svg_w, svg_h)
922             .inflate(1.0, 1.0)
923             .fill(rgb(50, 50, 50))
924     )?;
925 
926     // Passes.
927     for rect in pass_rects {
928         writeln!(output,
929             "    {}",
930             rect.inflate(3.0, 3.0)
931                 .border_radius(4.0)
932                 .opacity(0.4)
933                 .fill(black())
934         )?;
935     }
936 
937     // Links.
938     for (x1, y1, x2, y2) in links {
939         dump_task_dependency_link(output, x1, y1, x2, y2);
940     }
941 
942     // Tasks.
943     for node in &nodes {
944         if let Some(node) = node {
945             writeln!(output,
946                 "    {}",
947                 node.rect
948                     .clone()
949                     .fill(black())
950                     .border_radius(3.0)
951                     .opacity(0.5)
952                     .offset(0.0, 2.0)
953             )?;
954             writeln!(output,
955                 "    {}",
956                 node.rect
957                     .clone()
958                     .fill(rgb(200, 200, 200))
959                     .border_radius(3.0)
960                     .opacity(0.8)
961             )?;
962 
963             writeln!(output,
964                 "    {}",
965                 node.label
966                     .clone()
967                     .size(text_size)
968                     .align(Align::Center)
969                     .color(rgb(50, 50, 50))
970             )?;
971             writeln!(output,
972                 "    {}",
973                 node.size
974                     .clone()
975                     .size(text_size * 0.7)
976                     .align(Align::Center)
977                     .color(rgb(50, 50, 50))
978             )?;
979         }
980     }
981 
982     writeln!(output, "{}", EndSvg)
983 }
984 
985 #[allow(dead_code)]
dump_task_dependency_link( output: &mut dyn std::io::Write, x1: f32, y1: f32, x2: f32, y2: f32, )986 fn dump_task_dependency_link(
987     output: &mut dyn std::io::Write,
988     x1: f32, y1: f32,
989     x2: f32, y2: f32,
990 ) {
991     use svg_fmt::*;
992 
993     // If the link is a straight horizontal line and spans over multiple passes, it
994     // is likely to go straight though unrelated nodes in a way that makes it look like
995     // they are connected, so we bend the line upward a bit to avoid that.
996     let simple_path = (y1 - y2).abs() > 1.0 || (x2 - x1) < 45.0;
997 
998     let mid_x = (x1 + x2) / 2.0;
999     if simple_path {
1000         write!(output, "    {}",
1001             path().move_to(x1, y1)
1002                 .cubic_bezier_to(mid_x, y1, mid_x, y2, x2, y2)
1003                 .fill(Fill::None)
1004                 .stroke(Stroke::Color(rgb(100, 100, 100), 3.0))
1005         ).unwrap();
1006     } else {
1007         let ctrl1_x = (mid_x + x1) / 2.0;
1008         let ctrl2_x = (mid_x + x2) / 2.0;
1009         let ctrl_y = y1 - 25.0;
1010         write!(output, "    {}",
1011             path().move_to(x1, y1)
1012                 .cubic_bezier_to(ctrl1_x, y1, ctrl1_x, ctrl_y, mid_x, ctrl_y)
1013                 .cubic_bezier_to(ctrl2_x, ctrl_y, ctrl2_x, y2, x2, y2)
1014                 .fill(Fill::None)
1015                 .stroke(Stroke::Color(rgb(100, 100, 100), 3.0))
1016         ).unwrap();
1017     }
1018 }
1019 
1020 /// Construct a picture cache render task location for testing
1021 #[cfg(test)]
pc_target( surface_id: u64, tile_x: i32, tile_y: i32, ) -> RenderTaskLocation1022 fn pc_target(
1023     surface_id: u64,
1024     tile_x: i32,
1025     tile_y: i32,
1026 ) -> RenderTaskLocation {
1027     use crate::{
1028         composite::{NativeSurfaceId, NativeTileId},
1029         picture::ResolvedSurfaceTexture,
1030     };
1031 
1032     let width = 512;
1033     let height = 512;
1034 
1035     RenderTaskLocation::Static {
1036         surface: StaticRenderTaskSurface::PictureCache {
1037             surface: ResolvedSurfaceTexture::Native {
1038                 id: NativeTileId {
1039                     surface_id: NativeSurfaceId(surface_id),
1040                     x: tile_x,
1041                     y: tile_y,
1042                 },
1043                 size: DeviceIntSize::new(width, height),
1044             },
1045         },
1046         rect: DeviceIntSize::new(width, height).into(),
1047     }
1048 }
1049 
1050 #[cfg(test)]
1051 impl RenderTaskGraphBuilder {
test_expect( mut self, pass_count: usize, total_surface_count: usize, unique_surfaces: &[(i32, i32, ImageFormat)], )1052     fn test_expect(
1053         mut self,
1054         pass_count: usize,
1055         total_surface_count: usize,
1056         unique_surfaces: &[(i32, i32, ImageFormat)],
1057     ) {
1058         use crate::render_backend::FrameStamp;
1059         use api::{DocumentId, IdNamespace};
1060 
1061         let mut rc = ResourceCache::new_for_testing();
1062         let mut gc =  GpuCache::new();
1063 
1064         let mut frame_stamp = FrameStamp::first(DocumentId::new(IdNamespace(1), 1));
1065         frame_stamp.advance();
1066         gc.prepare_for_frames();
1067         gc.begin_frame(frame_stamp);
1068 
1069         let g = self.end_frame(&mut rc, &mut gc, &mut Vec::new());
1070         g.print();
1071 
1072         assert_eq!(g.passes.len(), pass_count);
1073         assert_eq!(g.surface_counts(), (total_surface_count, unique_surfaces.len()));
1074 
1075         rc.validate_surfaces(unique_surfaces);
1076     }
1077 }
1078 
1079 /// Construct a testing render task with given location
1080 #[cfg(test)]
task_location(location: RenderTaskLocation) -> RenderTask1081 fn task_location(location: RenderTaskLocation) -> RenderTask {
1082     RenderTask::new_test(
1083         location,
1084         RenderTargetKind::Color,
1085     )
1086 }
1087 
1088 /// Construct a dynamic render task location for testing
1089 #[cfg(test)]
task_dynamic(size: i32) -> RenderTask1090 fn task_dynamic(size: i32) -> RenderTask {
1091     RenderTask::new_test(
1092         RenderTaskLocation::Unallocated { size: DeviceIntSize::new(size, size) },
1093         RenderTargetKind::Color,
1094     )
1095 }
1096 
1097 #[test]
fg_test_1()1098 fn fg_test_1() {
1099     // Test that a root target can be used as an input for readbacks
1100     // This functionality isn't currently used, but will be in future.
1101 
1102     let mut gb = RenderTaskGraphBuilder::new();
1103 
1104     let root_target = pc_target(0, 0, 0);
1105 
1106     let root = gb.add().init(task_location(root_target.clone()));
1107 
1108     let readback = gb.add().init(task_dynamic(100));
1109     gb.add_dependency(readback, root);
1110 
1111     let mix_blend_content = gb.add().init(task_dynamic(50));
1112 
1113     let content = gb.add().init(task_location(root_target));
1114     gb.add_dependency(content, readback);
1115     gb.add_dependency(content, mix_blend_content);
1116 
1117     gb.test_expect(3, 1, &[
1118         (2048, 2048, ImageFormat::RGBA8),
1119     ]);
1120 }
1121 
1122 #[test]
fg_test_2()1123 fn fg_test_2() {
1124     // Test that texture cache tasks can be added and scheduled correctly as inputs
1125     // to picture cache tasks. Ensure that no dynamic surfaces are allocated from the
1126     // target pool in this case.
1127 
1128     let mut gb = RenderTaskGraphBuilder::new();
1129 
1130     let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
1131 
1132     let tc_0 = StaticRenderTaskSurface::TextureCache {
1133         texture: CacheTextureId(0),
1134         target_kind: RenderTargetKind::Color,
1135     };
1136 
1137     let tc_1 = StaticRenderTaskSurface::TextureCache {
1138         texture: CacheTextureId(1),
1139         target_kind: RenderTargetKind::Color,
1140     };
1141 
1142     gb.add_target_input(
1143         pc_root,
1144         tc_0.clone(),
1145     );
1146 
1147     gb.add_target_input(
1148         pc_root,
1149         tc_1.clone(),
1150     );
1151 
1152     gb.add().init(
1153         task_location(RenderTaskLocation::Static { surface: tc_0.clone(), rect: DeviceIntSize::new(128, 128).into() }),
1154     );
1155 
1156     gb.add().init(
1157         task_location(RenderTaskLocation::Static { surface: tc_1.clone(), rect: DeviceIntSize::new(128, 128).into() }),
1158     );
1159 
1160     gb.test_expect(2, 0, &[]);
1161 }
1162 
1163 #[test]
fg_test_3()1164 fn fg_test_3() {
1165     // Test that small targets are allocated in a shared surface, and that large
1166     // tasks are allocated in a rounded up texture size.
1167 
1168     let mut gb = RenderTaskGraphBuilder::new();
1169 
1170     let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
1171 
1172     let child_pic_0 = gb.add().init(task_dynamic(128));
1173     let child_pic_1 = gb.add().init(task_dynamic(3000));
1174 
1175     gb.add_dependency(pc_root, child_pic_0);
1176     gb.add_dependency(pc_root, child_pic_1);
1177 
1178     gb.test_expect(2, 2, &[
1179         (2048, 2048, ImageFormat::RGBA8),
1180         (3072, 3072, ImageFormat::RGBA8),
1181     ]);
1182 }
1183 
1184 #[test]
fg_test_4()1185 fn fg_test_4() {
1186     // Test that for a simple dependency chain of tasks, that render
1187     // target surfaces are aliased and reused between passes where possible.
1188 
1189     let mut gb = RenderTaskGraphBuilder::new();
1190 
1191     let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
1192 
1193     let child_pic_0 = gb.add().init(task_dynamic(128));
1194     let child_pic_1 = gb.add().init(task_dynamic(128));
1195     let child_pic_2 = gb.add().init(task_dynamic(128));
1196 
1197     gb.add_dependency(pc_root, child_pic_0);
1198     gb.add_dependency(child_pic_0, child_pic_1);
1199     gb.add_dependency(child_pic_1, child_pic_2);
1200 
1201     gb.test_expect(4, 3, &[
1202         (2048, 2048, ImageFormat::RGBA8),
1203         (2048, 2048, ImageFormat::RGBA8),
1204     ]);
1205 }
1206 
1207 #[test]
fg_test_5()1208 fn fg_test_5() {
1209     // Test that a task that is used as an input by direct parent and also
1210     // distance ancestor are scheduled correctly, and allocates the correct
1211     // number of passes, taking advantage of surface reuse / aliasing where feasible.
1212 
1213     let mut gb = RenderTaskGraphBuilder::new();
1214 
1215     let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
1216 
1217     let child_pic_0 = gb.add().init(task_dynamic(128));
1218     let child_pic_1 = gb.add().init(task_dynamic(64));
1219     let child_pic_2 = gb.add().init(task_dynamic(32));
1220     let child_pic_3 = gb.add().init(task_dynamic(16));
1221 
1222     gb.add_dependency(pc_root, child_pic_0);
1223     gb.add_dependency(child_pic_0, child_pic_1);
1224     gb.add_dependency(child_pic_1, child_pic_2);
1225     gb.add_dependency(child_pic_2, child_pic_3);
1226     gb.add_dependency(pc_root, child_pic_3);
1227 
1228     gb.test_expect(5, 4, &[
1229         (256, 256, ImageFormat::RGBA8),
1230         (2048, 2048, ImageFormat::RGBA8),
1231         (2048, 2048, ImageFormat::RGBA8),
1232     ]);
1233 }
1234 
1235 #[test]
fg_test_6()1236 fn fg_test_6() {
1237     // Test that a task that is used as an input dependency by two parent
1238     // tasks is correctly allocated and freed.
1239 
1240     let mut gb = RenderTaskGraphBuilder::new();
1241 
1242     let pc_root_1 = gb.add().init(task_location(pc_target(0, 0, 0)));
1243     let pc_root_2 = gb.add().init(task_location(pc_target(0, 1, 0)));
1244 
1245     let child_pic = gb.add().init(task_dynamic(128));
1246 
1247     gb.add_dependency(pc_root_1, child_pic);
1248     gb.add_dependency(pc_root_2, child_pic);
1249 
1250     gb.test_expect(2, 1, &[
1251         (2048, 2048, ImageFormat::RGBA8),
1252     ]);
1253 }
1254 
1255 #[test]
fg_test_7()1256 fn fg_test_7() {
1257     // Test that a standalone surface is not incorrectly used to
1258     // allocate subsequent shared task rects.
1259 
1260     let mut gb = RenderTaskGraphBuilder::new();
1261 
1262     let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
1263 
1264     let child0 = gb.add().init(task_dynamic(16));
1265     let child1 = gb.add().init(task_dynamic(16));
1266 
1267     let child2 = gb.add().init(task_dynamic(16));
1268     let child3 = gb.add().init(task_dynamic(16));
1269 
1270     gb.add_dependency(pc_root, child0);
1271     gb.add_dependency(child0, child1);
1272     gb.add_dependency(pc_root, child1);
1273 
1274     gb.add_dependency(pc_root, child2);
1275     gb.add_dependency(child2, child3);
1276 
1277     gb.test_expect(3, 3, &[
1278         (256, 256, ImageFormat::RGBA8),
1279         (2048, 2048, ImageFormat::RGBA8),
1280         (2048, 2048, ImageFormat::RGBA8),
1281     ]);
1282 }
1283