1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * Copyright 2016, Blender Foundation.
17  */
18 
19 /** \file
20  * \ingroup draw
21  */
22 
23 /* Private functions / structs of the draw manager */
24 
25 #pragma once
26 
27 #include "DRW_engine.h"
28 #include "DRW_render.h"
29 
30 #include "BLI_assert.h"
31 #include "BLI_linklist.h"
32 #include "BLI_memblock.h"
33 #include "BLI_task.h"
34 #include "BLI_threads.h"
35 
36 #include "GPU_batch.h"
37 #include "GPU_context.h"
38 #include "GPU_drawlist.h"
39 #include "GPU_framebuffer.h"
40 #include "GPU_shader.h"
41 #include "GPU_uniform_buffer.h"
42 #include "GPU_viewport.h"
43 
44 #include "draw_instance_data.h"
45 
46 /* Use draw manager to call GPU_select, see: DRW_draw_select_loop */
47 #define USE_GPU_SELECT
48 
49 /* Use drawcall batching using instanced rendering. */
50 #define USE_BATCHING 1
51 
52 // #define DRW_DEBUG_CULLING
53 #define DRW_DEBUG_USE_UNIFORM_NAME 0
54 #define DRW_UNIFORM_BUFFER_NAME 64
55 
56 /* ------------ Profiling --------------- */
57 
58 #define USE_PROFILE
59 
60 #ifdef USE_PROFILE
61 #  include "PIL_time.h"
62 
63 #  define PROFILE_TIMER_FALLOFF 0.04
64 
65 #  define PROFILE_START(time_start) \
66     double time_start = PIL_check_seconds_timer(); \
67     ((void)0)
68 
69 #  define PROFILE_END_ACCUM(time_accum, time_start) \
70     { \
71       time_accum += (PIL_check_seconds_timer() - time_start) * 1e3; \
72     } \
73     ((void)0)
74 
75 /* exp average */
76 #  define PROFILE_END_UPDATE(time_update, time_start) \
77     { \
78       double _time_delta = (PIL_check_seconds_timer() - time_start) * 1e3; \
79       time_update = (time_update * (1.0 - PROFILE_TIMER_FALLOFF)) + \
80                     (_time_delta * PROFILE_TIMER_FALLOFF); \
81     } \
82     ((void)0)
83 
84 #else /* USE_PROFILE */
85 
86 #  define PROFILE_START(time_start) ((void)0)
87 #  define PROFILE_END_ACCUM(time_accum, time_start) ((void)0)
88 #  define PROFILE_END_UPDATE(time_update, time_start) ((void)0)
89 
90 #endif /* USE_PROFILE */
91 
92 /* ------------ Data Structure --------------- */
93 /**
94  * Data structure containing all drawcalls organized by passes and materials.
95  * DRWPass > DRWShadingGroup > DRWCall > DRWCallState
96  *                           > DRWUniform
97  */
98 
99 typedef struct DRWCullingState {
100   uint32_t mask;
101   /* Culling: Using Bounding Sphere for now for faster culling.
102    * Not ideal for planes. Could be extended. */
103   BoundSphere bsphere;
104   /* Grrr only used by EEVEE. */
105   void *user_data;
106 } DRWCullingState;
107 
108 /* Minimum max UBO size is 64KiB. We take the largest
109  * UBO struct and alloc the max number.
110  * ((1 << 16) / sizeof(DRWObjectMatrix)) = 512
111  * Keep in sync with common_view_lib.glsl */
112 #define DRW_RESOURCE_CHUNK_LEN 512
113 
114 /**
115  * Identifier used to sort similar drawcalls together.
116  * Also used to reference elements inside memory blocks.
117  *
118  * From MSB to LSB
119  * 1 bit for negative scale.
120  * 22 bits for chunk id.
121  * 9 bits for resource id inside the chunk. (can go up to 511)
122  * |-|----------------------|---------|
123  *
124  * Use manual bit-shift and mask instead of bit-fields to avoid
125  * compiler dependent behavior that would mess the ordering of
126  * the members thus changing the sorting order.
127  */
128 typedef uint32_t DRWResourceHandle;
129 
DRW_handle_negative_scale_get(const DRWResourceHandle * handle)130 BLI_INLINE uint32_t DRW_handle_negative_scale_get(const DRWResourceHandle *handle)
131 {
132   return (*handle & 0x80000000) != 0;
133 }
134 
DRW_handle_chunk_get(const DRWResourceHandle * handle)135 BLI_INLINE uint32_t DRW_handle_chunk_get(const DRWResourceHandle *handle)
136 {
137   return (*handle & 0x7FFFFFFF) >> 9;
138 }
139 
DRW_handle_id_get(const DRWResourceHandle * handle)140 BLI_INLINE uint32_t DRW_handle_id_get(const DRWResourceHandle *handle)
141 {
142   return (*handle & 0x000001FF);
143 }
144 
DRW_handle_increment(DRWResourceHandle * handle)145 BLI_INLINE void DRW_handle_increment(DRWResourceHandle *handle)
146 {
147   *handle += 1;
148 }
149 
DRW_handle_negative_scale_enable(DRWResourceHandle * handle)150 BLI_INLINE void DRW_handle_negative_scale_enable(DRWResourceHandle *handle)
151 {
152   *handle |= 0x80000000;
153 }
154 
DRW_memblock_elem_from_handle(struct BLI_memblock * memblock,const DRWResourceHandle * handle)155 BLI_INLINE void *DRW_memblock_elem_from_handle(struct BLI_memblock *memblock,
156                                                const DRWResourceHandle *handle)
157 {
158   int elem = DRW_handle_id_get(handle);
159   int chunk = DRW_handle_chunk_get(handle);
160   return BLI_memblock_elem_get(memblock, chunk, elem);
161 }
162 
163 typedef struct DRWObjectMatrix {
164   float model[4][4];
165   float modelinverse[4][4];
166 } DRWObjectMatrix;
167 
168 typedef struct DRWObjectInfos {
169   float orcotexfac[2][4];
170   float ob_color[4];
171   float ob_index;
172   float pad; /* UNUSED*/
173   float ob_random;
174   float ob_flag; /* sign is negative scaling,  */
175 } DRWObjectInfos;
176 
177 BLI_STATIC_ASSERT_ALIGN(DRWObjectMatrix, 16)
178 BLI_STATIC_ASSERT_ALIGN(DRWObjectInfos, 16)
179 
180 typedef enum {
181   /* Draw Commands */
182   DRW_CMD_DRAW = 0, /* Only sortable type. Must be 0. */
183   DRW_CMD_DRAW_RANGE = 1,
184   DRW_CMD_DRAW_INSTANCE = 2,
185   DRW_CMD_DRAW_INSTANCE_RANGE = 3,
186   DRW_CMD_DRAW_PROCEDURAL = 4,
187   /* Other Commands */
188   DRW_CMD_CLEAR = 12,
189   DRW_CMD_DRWSTATE = 13,
190   DRW_CMD_STENCIL = 14,
191   DRW_CMD_SELECTID = 15,
192   /* Needs to fit in 4bits */
193 } eDRWCommandType;
194 
195 #define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_PROCEDURAL
196 
197 typedef struct DRWCommandDraw {
198   GPUBatch *batch;
199   DRWResourceHandle handle;
200 } DRWCommandDraw;
201 
202 /* Assume DRWResourceHandle to be 0. */
203 typedef struct DRWCommandDrawRange {
204   GPUBatch *batch;
205   DRWResourceHandle handle;
206   uint vert_first;
207   uint vert_count;
208 } DRWCommandDrawRange;
209 
210 typedef struct DRWCommandDrawInstance {
211   GPUBatch *batch;
212   DRWResourceHandle handle;
213   uint inst_count;
214   uint use_attrs; /* bool */
215 } DRWCommandDrawInstance;
216 
217 typedef struct DRWCommandDrawInstanceRange {
218   GPUBatch *batch;
219   DRWResourceHandle handle;
220   uint inst_first;
221   uint inst_count;
222 } DRWCommandDrawInstanceRange;
223 
224 typedef struct DRWCommandDrawProcedural {
225   GPUBatch *batch;
226   DRWResourceHandle handle;
227   uint vert_count;
228 } DRWCommandDrawProcedural;
229 
230 typedef struct DRWCommandSetMutableState {
231   /** State changes (or'd or and'd with the pass's state) */
232   DRWState enable;
233   DRWState disable;
234 } DRWCommandSetMutableState;
235 
236 typedef struct DRWCommandSetStencil {
237   uint write_mask;
238   uint comp_mask;
239   uint ref;
240 } DRWCommandSetStencil;
241 
242 typedef struct DRWCommandSetSelectID {
243   GPUVertBuf *select_buf;
244   uint select_id;
245 } DRWCommandSetSelectID;
246 
247 typedef struct DRWCommandClear {
248   eGPUFrameBufferBits clear_channels;
249   uchar r, g, b, a; /* [0..1] for each channels. Normalized. */
250   float depth;      /* [0..1] for depth. Normalized. */
251   uchar stencil;    /* Stencil value [0..255] */
252 } DRWCommandClear;
253 
254 typedef union DRWCommand {
255   DRWCommandDraw draw;
256   DRWCommandDrawRange range;
257   DRWCommandDrawInstance instance;
258   DRWCommandDrawInstanceRange instance_range;
259   DRWCommandDrawProcedural procedural;
260   DRWCommandSetMutableState state;
261   DRWCommandSetStencil stencil;
262   DRWCommandSetSelectID select_id;
263   DRWCommandClear clear;
264 } DRWCommand;
265 
266 /* Used for agregating calls into GPUVertBufs. */
267 struct DRWCallBuffer {
268   GPUVertBuf *buf;
269   GPUVertBuf *buf_select;
270   int count;
271 };
272 
273 /* Used by DRWUniform.type */
274 typedef enum {
275   DRW_UNIFORM_INT = 0,
276   DRW_UNIFORM_INT_COPY,
277   DRW_UNIFORM_FLOAT,
278   DRW_UNIFORM_FLOAT_COPY,
279   DRW_UNIFORM_TEXTURE,
280   DRW_UNIFORM_TEXTURE_REF,
281   DRW_UNIFORM_IMAGE,
282   DRW_UNIFORM_IMAGE_REF,
283   DRW_UNIFORM_BLOCK,
284   DRW_UNIFORM_BLOCK_REF,
285   DRW_UNIFORM_TFEEDBACK_TARGET,
286   /** Per drawcall uniforms/UBO */
287   DRW_UNIFORM_BLOCK_OBMATS,
288   DRW_UNIFORM_BLOCK_OBINFOS,
289   DRW_UNIFORM_RESOURCE_CHUNK,
290   DRW_UNIFORM_RESOURCE_ID,
291   /** Legacy / Fallback */
292   DRW_UNIFORM_BASE_INSTANCE,
293   DRW_UNIFORM_MODEL_MATRIX,
294   DRW_UNIFORM_MODEL_MATRIX_INVERSE,
295   /* WARNING: set DRWUniform->type
296    * bit length accordingly. */
297 } DRWUniformType;
298 
299 struct DRWUniform {
300   union {
301     /* For reference or array/vector types. */
302     const void *pvalue;
303     /* DRW_UNIFORM_TEXTURE */
304     struct {
305       union {
306         GPUTexture *texture;
307         GPUTexture **texture_ref;
308       };
309       eGPUSamplerState sampler_state;
310     };
311     /* DRW_UNIFORM_BLOCK */
312     union {
313       GPUUniformBuf *block;
314       GPUUniformBuf **block_ref;
315     };
316     /* DRW_UNIFORM_FLOAT_COPY */
317     float fvalue[4];
318     /* DRW_UNIFORM_INT_COPY */
319     int ivalue[4];
320   };
321   int location;      /* Uniform location or binding point for textures and ubos. */
322   uint8_t type;      /* DRWUniformType */
323   uint8_t length;    /* Length of vector types. */
324   uint8_t arraysize; /* Array size of scalar/vector types. */
325 };
326 
327 struct DRWShadingGroup {
328   DRWShadingGroup *next;
329 
330   GPUShader *shader;                /* Shader to bind */
331   struct DRWUniformChunk *uniforms; /* Uniforms pointers */
332 
333   struct {
334     /* Chunks of draw calls. */
335     struct DRWCommandChunk *first, *last;
336   } cmd;
337 
338   union {
339     /* This struct is used during cache populate. */
340     struct {
341       int objectinfo;                /* Equal to 1 if the shader needs obinfos. */
342       DRWResourceHandle pass_handle; /* Memblock key to parent pass. */
343     };
344     /* This struct is used after cache populate if using the Z sorting.
345      * It will not conflict with the above struct. */
346     struct {
347       float distance;      /* Distance from camera. */
348       uint original_index; /* Original position inside the shgroup list. */
349     } z_sorting;
350   };
351 };
352 
353 #define MAX_PASS_NAME 32
354 
355 struct DRWPass {
356   /* Linked list */
357   struct {
358     DRWShadingGroup *first;
359     DRWShadingGroup *last;
360   } shgroups;
361 
362   /* Draw the shgroups of this pass instead.
363    * This avoid duplicating drawcalls/shgroups
364    * for similar passes. */
365   DRWPass *original;
366   /* Link list of additional passes to render. */
367   DRWPass *next;
368 
369   DRWResourceHandle handle;
370   DRWState state;
371   char name[MAX_PASS_NAME];
372 };
373 
374 /* keep in sync with viewBlock */
375 typedef struct DRWViewUboStorage {
376   /* View matrices */
377   float persmat[4][4];
378   float persinv[4][4];
379   float viewmat[4][4];
380   float viewinv[4][4];
381   float winmat[4][4];
382   float wininv[4][4];
383 
384   float clipplanes[6][4];
385   float viewvecs[2][4];
386   /* Should not be here. Not view dependent (only main view). */
387   float viewcamtexcofac[4];
388 } DRWViewUboStorage;
389 
390 BLI_STATIC_ASSERT_ALIGN(DRWViewUboStorage, 16)
391 
392 #define MAX_CULLED_VIEWS 32
393 
394 struct DRWView {
395   /** Parent view if this is a sub view. NULL otherwise. */
396   struct DRWView *parent;
397 
398   DRWViewUboStorage storage;
399   /** Number of active clipplanes. */
400   int clip_planes_len;
401   /** Does culling result needs to be updated. */
402   bool is_dirty;
403   /** Does facing needs to be reversed? */
404   bool is_inverted;
405   /** Culling */
406   uint32_t culling_mask;
407   BoundBox frustum_corners;
408   BoundSphere frustum_bsphere;
409   float frustum_planes[6][4];
410   /** Custom visibility function. */
411   DRWCallVisibilityFn *visibility_fn;
412   void *user_data;
413 };
414 
415 /* ------------ Data Chunks --------------- */
416 /**
417  * In order to keep a cache friendly data structure,
418  * we alloc most of our little data into chunks of multiple item.
419  * Iteration, allocation and memory usage are better.
420  * We lose a bit of memory by allocating more than what we need
421  * but it's counterbalanced by not needing the linked-list pointers
422  * for each item.
423  **/
424 
425 typedef struct DRWUniformChunk {
426   struct DRWUniformChunk *next; /* single-linked list */
427   uint32_t uniform_len;
428   uint32_t uniform_used;
429   DRWUniform uniforms[10];
430 } DRWUniformChunk;
431 
432 typedef struct DRWCommandChunk {
433   struct DRWCommandChunk *next;
434   uint32_t command_len;
435   uint32_t command_used;
436   /* 4bits for each command. */
437   uint64_t command_type[6];
438   /* -- 64 bytes aligned -- */
439   DRWCommand commands[96];
440   /* -- 64 bytes aligned -- */
441 } DRWCommandChunk;
442 
443 typedef struct DRWCommandSmallChunk {
444   struct DRWCommandChunk *next;
445   uint32_t command_len;
446   uint32_t command_used;
447   /* 4bits for each command. */
448   /* TODO reduce size of command_type. */
449   uint64_t command_type[6];
450   DRWCommand commands[6];
451 } DRWCommandSmallChunk;
452 
453 /* Only true for 64-bit platforms. */
454 #ifdef __LP64__
455 BLI_STATIC_ASSERT_ALIGN(DRWCommandChunk, 16);
456 #endif
457 
458 /* ------------- DRAW DEBUG ------------ */
459 
460 typedef struct DRWDebugLine {
461   struct DRWDebugLine *next; /* linked list */
462   float pos[2][3];
463   float color[4];
464 } DRWDebugLine;
465 
466 typedef struct DRWDebugSphere {
467   struct DRWDebugSphere *next; /* linked list */
468   float mat[4][4];
469   float color[4];
470 } DRWDebugSphere;
471 
472 /* ------------- DRAW MANAGER ------------ */
473 
474 #define DST_MAX_SLOTS 64  /* Cannot be changed without modifying RST.bound_tex_slots */
475 #define MAX_CLIP_PLANES 6 /* GL_MAX_CLIP_PLANES is at least 6 */
476 #define STENCIL_UNDEFINED 256
477 #define DRW_DRAWLIST_LEN 256
478 typedef struct DRWManager {
479   /* TODO clean up this struct a bit */
480   /* Cache generation */
481   ViewportMemoryPool *vmempool;
482   DRWInstanceDataList *idatalist;
483   /* State of the object being evaluated if already allocated. */
484   DRWResourceHandle ob_handle;
485   /** True if current DST.ob_state has its matching DRWObjectInfos init. */
486   bool ob_state_obinfo_init;
487   /** Handle of current object resource in object resource arrays (DRWObjectMatrices/Infos). */
488   DRWResourceHandle resource_handle;
489   /** Handle of next DRWPass to be allocated. */
490   DRWResourceHandle pass_handle;
491 
492   /** Dupli state. NULL if not dupli. */
493   struct DupliObject *dupli_source;
494   struct Object *dupli_parent;
495   struct Object *dupli_origin;
496   /** Ghash containing original objects. */
497   struct GHash *dupli_ghash;
498   /** TODO(fclem): try to remove usage of this. */
499   DRWInstanceData *object_instance_data[MAX_INSTANCE_DATA_SIZE];
500   /* Array of dupli_data (one for each enabled engine) to handle duplis. */
501   void **dupli_datas;
502 
503   /* Rendering state */
504   GPUShader *shader;
505   GPUBatch *batch;
506 
507   /* Managed by `DRW_state_set`, `DRW_state_reset` */
508   DRWState state;
509   DRWState state_lock;
510 
511   /* Per viewport */
512   GPUViewport *viewport;
513   struct GPUFrameBuffer *default_framebuffer;
514   float size[2];
515   float inv_size[2];
516   float screenvecs[2][3];
517   float pixsize;
518 
519   struct {
520     uint is_select : 1;
521     uint is_depth : 1;
522     uint is_image_render : 1;
523     uint is_scene_render : 1;
524     uint do_color_management : 1;
525     uint draw_background : 1;
526     uint draw_text : 1;
527   } options;
528 
529   /* Current rendering context */
530   DRWContextState draw_ctx;
531 
532   /* Convenience pointer to text_store owned by the viewport */
533   struct DRWTextStore **text_store_p;
534 
535   ListBase enabled_engines; /* RenderEngineType */
536   void **vedata_array;      /* ViewportEngineData */
537   int enabled_engine_count; /* Length of enabled_engines list. */
538 
539   bool buffer_finish_called; /* Avoid bad usage of DRW_render_instance_buffer_finish */
540 
541   DRWView *view_default;
542   DRWView *view_active;
543   DRWView *view_previous;
544   uint primary_view_ct;
545   /** TODO(fclem): Remove this. Only here to support
546    * shaders without common_view_lib.glsl */
547   DRWViewUboStorage view_storage_cpy;
548 
549 #ifdef USE_GPU_SELECT
550   uint select_id;
551 #endif
552 
553   struct TaskGraph *task_graph;
554   /* Contains list of objects that needs to be extracted from other objects. */
555   struct GSet *delayed_extraction;
556 
557   /* ---------- Nothing after this point is cleared after use ----------- */
558 
559   /* gl_context serves as the offset for clearing only
560    * the top portion of the struct so DO NOT MOVE IT! */
561   /** Unique ghost context used by the draw manager. */
562   void *gl_context;
563   GPUContext *gpu_context;
564   /** Mutex to lock the drw manager and avoid concurrent context usage. */
565   TicketMutex *gl_context_mutex;
566 
567   GPUDrawList *draw_list;
568 
569   struct {
570     /* TODO(fclem): optimize: use chunks. */
571     DRWDebugLine *lines;
572     DRWDebugSphere *spheres;
573   } debug;
574 } DRWManager;
575 
576 extern DRWManager DST; /* TODO: get rid of this and allow multi-threaded rendering. */
577 
578 /* --------------- FUNCTIONS ------------- */
579 
580 void drw_texture_set_parameters(GPUTexture *tex, DRWTextureFlag flags);
581 
582 void *drw_viewport_engine_data_ensure(void *engine_type);
583 
584 void drw_state_set(DRWState state);
585 
586 void drw_debug_draw(void);
587 void drw_debug_init(void);
588 
589 eDRWCommandType command_type_get(const uint64_t *command_type_bits, int index);
590 
591 void drw_batch_cache_validate(Object *ob);
592 void drw_batch_cache_generate_requested(struct Object *ob);
593 void drw_batch_cache_generate_requested_delayed(Object *ob);
594 
595 void drw_resource_buffer_finish(ViewportMemoryPool *vmempool);
596 
597 /* Procedural Drawing */
598 GPUBatch *drw_cache_procedural_points_get(void);
599 GPUBatch *drw_cache_procedural_lines_get(void);
600 GPUBatch *drw_cache_procedural_triangles_get(void);
601