1 /**************************************************************************
2  *
3  * Copyright 2018-2019 Alyssa Rosenzweig
4  * Copyright 2018-2019 Collabora, Ltd.
5  * Copyright © 2015 Intel Corporation
6  * All Rights Reserved.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the
10  * "Software"), to deal in the Software without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, sub license, and/or sell copies of the Software, and to
13  * permit persons to whom the Software is furnished to do so, subject to
14  * the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the
17  * next paragraph) shall be included in all copies or substantial portions
18  * of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
23  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
24  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27  *
28  **************************************************************************/
29 
30 #ifndef PAN_DEVICE_H
31 #define PAN_DEVICE_H
32 
33 #include <xf86drm.h>
34 #include "renderonly/renderonly.h"
35 #include "util/u_dynarray.h"
36 #include "util/bitset.h"
37 #include "util/list.h"
38 #include "util/sparse_array.h"
39 
40 #include "panfrost/util/pan_ir.h"
41 #include "pan_pool.h"
42 #include "pan_util.h"
43 
44 #include <genxml/gen_macros.h>
45 
46 #if defined(__cplusplus)
47 extern "C" {
48 #endif
49 
50 /* Driver limits */
51 #define PAN_MAX_CONST_BUFFERS 16
52 
53 /* How many power-of-two levels in the BO cache do we want? 2^12
54  * minimum chosen as it is the page size that all allocations are
55  * rounded to */
56 
57 #define MIN_BO_CACHE_BUCKET (12) /* 2^12 = 4KB */
58 #define MAX_BO_CACHE_BUCKET (22) /* 2^22 = 4MB */
59 
60 /* Fencepost problem, hence the off-by-one */
61 #define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
62 
63 struct pan_blitter {
64         struct {
65                 struct pan_pool *pool;
66                 struct hash_table *blit;
67                 struct hash_table *blend;
68                 pthread_mutex_t lock;
69         } shaders;
70         struct {
71                 struct pan_pool *pool;
72                 struct hash_table *rsds;
73                 pthread_mutex_t lock;
74         } rsds;
75 };
76 
77 struct pan_blend_shaders {
78         struct hash_table *shaders;
79         pthread_mutex_t lock;
80 };
81 
82 enum pan_indirect_draw_flags {
83         PAN_INDIRECT_DRAW_NO_INDEX = 0 << 0,
84         PAN_INDIRECT_DRAW_1B_INDEX = 1 << 0,
85         PAN_INDIRECT_DRAW_2B_INDEX = 2 << 0,
86         PAN_INDIRECT_DRAW_4B_INDEX = 3 << 0,
87         PAN_INDIRECT_DRAW_INDEX_SIZE_MASK = 3 << 0,
88         PAN_INDIRECT_DRAW_HAS_PSIZ = 1 << 2,
89         PAN_INDIRECT_DRAW_PRIMITIVE_RESTART = 1 << 3,
90         PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE = 1 << 4,
91         PAN_INDIRECT_DRAW_IDVS = 1 << 5,
92         PAN_INDIRECT_DRAW_LAST_FLAG = PAN_INDIRECT_DRAW_IDVS,
93         PAN_INDIRECT_DRAW_FLAGS_MASK = (PAN_INDIRECT_DRAW_LAST_FLAG << 1) - 1,
94         PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX = PAN_INDIRECT_DRAW_LAST_FLAG << 1,
95         PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX,
96         PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_4B_INDEX,
97         PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX_PRIM_RESTART,
98         PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX_PRIM_RESTART,
99         PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_3B_INDEX_PRIM_RESTART,
100         PAN_INDIRECT_DRAW_NUM_SHADERS,
101 };
102 
103 struct pan_indirect_draw_shader {
104         struct panfrost_ubo_push push;
105         mali_ptr rsd;
106 };
107 
108 struct pan_indirect_draw_shaders {
109         struct pan_indirect_draw_shader shaders[PAN_INDIRECT_DRAW_NUM_SHADERS];
110 
111         /* Take the lock when initializing the draw shaders context or when
112          * allocating from the binary pool.
113          */
114         pthread_mutex_t lock;
115 
116         /* A memory pool for shader binaries. We currently don't allocate a
117          * single BO for all shaders up-front because estimating shader size
118          * is not trivial, and changes to the compiler might influence this
119          * estimation.
120          */
121         struct pan_pool *bin_pool;
122 
123         /* BO containing all renderer states attached to the compute shaders.
124          * Those are built at shader compilation time and re-used every time
125          * panfrost_emit_indirect_draw() is called.
126          */
127         struct panfrost_bo *states;
128 
129         /* Varying memory is allocated dynamically by compute jobs from this
130          * heap.
131          */
132         struct panfrost_bo *varying_heap;
133 };
134 
135 struct pan_indirect_dispatch {
136         struct panfrost_ubo_push push;
137         struct panfrost_bo *bin;
138         struct panfrost_bo *descs;
139 };
140 
141 /** Implementation-defined tiler features */
142 struct panfrost_tiler_features {
143         /** Number of bytes per tiler bin */
144         unsigned bin_size;
145 
146         /** Maximum number of levels that may be simultaneously enabled.
147          * Invariant: bitcount(hierarchy_mask) <= max_levels */
148         unsigned max_levels;
149 };
150 
151 struct panfrost_model {
152         /* GPU ID */
153         uint32_t gpu_id;
154 
155         /* Marketing name for the GPU, used as the GL_RENDERER */
156         const char *name;
157 
158         /* Set of associated performance counters */
159         const char *performance_counters;
160 
161         /* Minimum GPU revision required for anisotropic filtering. ~0 and 0
162          * means "no revisions support anisotropy" and "all revisions support
163          * anistropy" respectively -- so checking for anisotropy is simply
164          * comparing the reivsion.
165          */
166         uint32_t min_rev_anisotropic;
167 
168         struct {
169                 /* The GPU lacks the capability for hierarchical tiling, without
170                  * an "Advanced Tiling Unit", instead requiring a single bin
171                  * size for the entire framebuffer be selected by the driver
172                  */
173                 bool no_hierarchical_tiling;
174         } quirks;
175 };
176 
177 struct panfrost_device {
178         /* For ralloc */
179         void *memctx;
180 
181         int fd;
182 
183         /* Properties of the GPU in use */
184         unsigned arch;
185         unsigned gpu_id;
186         unsigned revision;
187         unsigned core_count;
188         unsigned thread_tls_alloc;
189         struct panfrost_tiler_features tiler_features;
190         const struct panfrost_model *model;
191         bool has_afbc;
192 
193         /* Table of formats, indexed by a PIPE format */
194         const struct panfrost_format *formats;
195 
196         /* Bitmask of supported compressed texture formats */
197         uint32_t compressed_formats;
198 
199         /* debug flags, see pan_util.h how to interpret */
200         unsigned debug;
201 
202         drmVersionPtr kernel_version;
203 
204         struct renderonly *ro;
205 
206         pthread_mutex_t bo_map_lock;
207         struct util_sparse_array bo_map;
208 
209         struct {
210                 pthread_mutex_t lock;
211 
212                 /* List containing all cached BOs sorted in LRU (Least
213                  * Recently Used) order. This allows us to quickly evict BOs
214                  * that are more than 1 second old.
215                  */
216                 struct list_head lru;
217 
218                 /* The BO cache is a set of buckets with power-of-two sizes
219                  * ranging from 2^12 (4096, the page size) to
220                  * 2^(12 + MAX_BO_CACHE_BUCKETS).
221                  * Each bucket is a linked list of free panfrost_bo objects. */
222 
223                 struct list_head buckets[NR_BO_CACHE_BUCKETS];
224         } bo_cache;
225 
226         struct pan_blitter blitter;
227         struct pan_blend_shaders blend_shaders;
228         struct pan_indirect_draw_shaders indirect_draw_shaders;
229         struct pan_indirect_dispatch indirect_dispatch;
230 
231         /* Tiler heap shared across all tiler jobs, allocated against the
232          * device since there's only a single tiler. Since this is invisible to
233          * the CPU, it's okay for multiple contexts to reference it
234          * simultaneously; by keeping on the device struct, we eliminate a
235          * costly per-context allocation. */
236 
237         struct panfrost_bo *tiler_heap;
238 
239         /* The tiler heap is shared by all contexts, and is written by tiler
240          * jobs and read by fragment job. We need to ensure that a
241          * vertex/tiler job chain from one context is not inserted between
242          * the vertex/tiler and fragment job of another context, otherwise
243          * we end up with tiler heap corruption.
244          */
245         pthread_mutex_t submit_lock;
246 
247         /* Sample positions are preloaded into a write-once constant buffer,
248          * such that they can be referenced fore free later. Needed
249          * unconditionally on Bifrost, and useful for sharing with Midgard */
250 
251         struct panfrost_bo *sample_positions;
252 };
253 
254 void
255 panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev);
256 
257 void
258 panfrost_close_device(struct panfrost_device *dev);
259 
260 bool
261 panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt);
262 
263 void
264 panfrost_upload_sample_positions(struct panfrost_device *dev);
265 
266 mali_ptr
267 panfrost_sample_positions(const struct panfrost_device *dev,
268                 enum mali_sample_pattern pattern);
269 void
270 panfrost_query_sample_position(
271                 enum mali_sample_pattern pattern,
272                 unsigned sample_idx,
273                 float *out);
274 
275 static inline struct panfrost_bo *
pan_lookup_bo(struct panfrost_device * dev,uint32_t gem_handle)276 pan_lookup_bo(struct panfrost_device *dev, uint32_t gem_handle)
277 {
278         return (struct panfrost_bo *)util_sparse_array_get(&dev->bo_map, gem_handle);
279 }
280 
281 static inline bool
pan_is_bifrost(const struct panfrost_device * dev)282 pan_is_bifrost(const struct panfrost_device *dev)
283 {
284         return dev->arch >= 6 && dev->arch <= 7;
285 }
286 
287 const struct panfrost_model * panfrost_get_model(uint32_t gpu_id);
288 
289 #if defined(__cplusplus)
290 } // extern "C"
291 #endif
292 
293 #endif
294