1 /*
2  * Copyright (C) 2019 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include <xf86drm.h>
28 
29 #include "util/u_math.h"
30 #include "util/macros.h"
31 #include "util/hash_table.h"
32 #include "util/u_thread.h"
33 #include "drm-uapi/panfrost_drm.h"
34 #include "pan_encoder.h"
35 #include "pan_device.h"
36 #include "panfrost-quirks.h"
37 #include "pan_bo.h"
38 #include "pan_texture.h"
39 #include "wrap.h"
40 #include "pan_util.h"
41 
42 /* Abstraction over the raw drm_panfrost_get_param ioctl for fetching
43  * information about devices */
44 
45 static __u64
panfrost_query_raw(int fd,enum drm_panfrost_param param,bool required,unsigned default_value)46 panfrost_query_raw(
47                 int fd,
48                 enum drm_panfrost_param param,
49                 bool required,
50                 unsigned default_value)
51 {
52         struct drm_panfrost_get_param get_param = {0,};
53         ASSERTED int ret;
54 
55         get_param.param = param;
56         ret = drmIoctl(fd, DRM_IOCTL_PANFROST_GET_PARAM, &get_param);
57 
58         if (ret) {
59                 assert(!required);
60                 return default_value;
61         }
62 
63         return get_param.value;
64 }
65 
66 static unsigned
panfrost_query_gpu_version(int fd)67 panfrost_query_gpu_version(int fd)
68 {
69 #ifndef NDEBUG
70         /* In debug builds, allow overriding the GPU ID, for example to run
71          * Bifrost shader-db on a Midgard machine. This is a bit less heavy
72          * handed than setting up the entirety of drm-shim */
73         char *override_version = getenv("PAN_GPU_ID");
74 
75         if (override_version)
76                 return strtol(override_version, NULL, 16);
77 #endif
78 
79         return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_PROD_ID, true, 0);
80 }
81 
82 static unsigned
panfrost_query_gpu_revision(int fd)83 panfrost_query_gpu_revision(int fd)
84 {
85         return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_REVISION, true, 0);
86 }
87 
88 static struct panfrost_tiler_features
panfrost_query_tiler_features(int fd)89 panfrost_query_tiler_features(int fd)
90 {
91         /* Default value (2^9 bytes and 8 levels) to match old behaviour */
92         uint32_t raw = panfrost_query_raw(fd, DRM_PANFROST_PARAM_TILER_FEATURES,
93                         false, 0x809);
94 
95         /* Bin size is log2 in the first byte, max levels in the second byte */
96         return (struct panfrost_tiler_features) {
97                 .bin_size = (1 << (raw & BITFIELD_MASK(5))),
98                 .max_levels = (raw >> 8) & BITFIELD_MASK(4)
99         };
100 }
101 
102 static unsigned
panfrost_query_core_count(int fd)103 panfrost_query_core_count(int fd)
104 {
105         /* On older kernels, worst-case to 16 cores */
106 
107         unsigned mask = panfrost_query_raw(fd,
108                         DRM_PANFROST_PARAM_SHADER_PRESENT, false, 0xffff);
109 
110         /* Some cores might be absent. For TLS computation purposes, we care
111          * about the greatest ID + 1, which equals the core count if all cores
112          * are present, but allocates space for absent cores if needed.
113          * util_last_bit is defined to return the greatest bit set + 1, which
114          * is exactly what we need. */
115 
116         return util_last_bit(mask);
117 }
118 
119 /* Architectural maximums, since this register may be not implemented
120  * by a given chip. G31 is actually 512 instead of 768 but it doesn't
121  * really matter. */
122 
123 static unsigned
panfrost_max_thread_count(unsigned arch)124 panfrost_max_thread_count(unsigned arch)
125 {
126         switch (arch) {
127         /* Midgard */
128         case 4:
129         case 5:
130                 return 256;
131 
132         /* Bifrost, first generation */
133         case 6:
134                 return 384;
135 
136         /* Bifrost, second generation (G31 is 512 but it doesn't matter) */
137         case 7:
138                 return 768;
139 
140         /* Valhall (for completeness) */
141         default:
142                 return 1024;
143         }
144 }
145 
146 static unsigned
panfrost_query_thread_tls_alloc(int fd,unsigned major)147 panfrost_query_thread_tls_alloc(int fd, unsigned major)
148 {
149         unsigned tls = panfrost_query_raw(fd,
150                         DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, false, 0);
151 
152         return (tls > 0) ? tls : panfrost_max_thread_count(major);
153 }
154 
155 static uint32_t
panfrost_query_compressed_formats(int fd)156 panfrost_query_compressed_formats(int fd)
157 {
158         /* If unspecified, assume ASTC/ETC only. Factory default for Juno, and
159          * should exist on any Mali configuration. All hardware should report
160          * these texture formats but the kernel might not be new enough. */
161 
162         uint32_t default_set =
163                 (1 << MALI_ETC2_RGB8) |
164                 (1 << MALI_ETC2_R11_UNORM) |
165                 (1 << MALI_ETC2_RGBA8) |
166                 (1 << MALI_ETC2_RG11_UNORM) |
167                 (1 << MALI_ETC2_R11_SNORM) |
168                 (1 << MALI_ETC2_RG11_SNORM) |
169                 (1 << MALI_ETC2_RGB8A1) |
170                 (1 << MALI_ASTC_3D_LDR) |
171                 (1 << MALI_ASTC_3D_HDR) |
172                 (1 << MALI_ASTC_2D_LDR) |
173                 (1 << MALI_ASTC_2D_HDR);
174 
175         return panfrost_query_raw(fd, DRM_PANFROST_PARAM_TEXTURE_FEATURES0,
176                         false, default_set);
177 }
178 
179 /* DRM_PANFROST_PARAM_TEXTURE_FEATURES0 will return a bitmask of supported
180  * compressed formats, so we offer a helper to test if a format is supported */
181 
182 bool
panfrost_supports_compressed_format(struct panfrost_device * dev,unsigned fmt)183 panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt)
184 {
185         if (MALI_EXTRACT_TYPE(fmt) != MALI_FORMAT_COMPRESSED)
186                 return true;
187 
188         unsigned idx = fmt & ~MALI_FORMAT_COMPRESSED;
189         assert(idx < 32);
190 
191         return dev->compressed_formats & (1 << idx);
192 }
193 
194 /* Given a GPU ID like 0x860, return a prettified model name */
195 
196 const char *
panfrost_model_name(unsigned gpu_id)197 panfrost_model_name(unsigned gpu_id)
198 {
199         switch (gpu_id) {
200         case 0x600: return "Mali-T600 (Panfrost)";
201         case 0x620: return "Mali-T620 (Panfrost)";
202         case 0x720: return "Mali-T720 (Panfrost)";
203         case 0x820: return "Mali-T820 (Panfrost)";
204         case 0x830: return "Mali-T830 (Panfrost)";
205         case 0x750: return "Mali-T760 (Panfrost)";
206         case 0x860: return "Mali-T860 (Panfrost)";
207         case 0x880: return "Mali-T880 (Panfrost)";
208         case 0x6221: return "Mali-G72 (Panfrost)";
209         case 0x7093: return "Mali-G31 (Panfrost)";
210         case 0x7212: return "Mali-G52 (Panfrost)";
211         case 0x7402: return "Mali-G52 r1 (Panfrost)";
212         default:
213                     unreachable("Invalid GPU ID");
214         }
215 }
216 
217 /* Check for AFBC hardware support. AFBC is introduced in v5. Implementations
218  * may omit it, signaled as a nonzero value in the AFBC_FEATURES property. */
219 
220 static bool
panfrost_query_afbc(int fd,unsigned arch)221 panfrost_query_afbc(int fd, unsigned arch)
222 {
223         unsigned reg = panfrost_query_raw(fd,
224                                           DRM_PANFROST_PARAM_AFBC_FEATURES,
225                                           false, 0);
226 
227         return (arch >= 5) && (reg == 0);
228 }
229 
230 void
panfrost_open_device(void * memctx,int fd,struct panfrost_device * dev)231 panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
232 {
233         dev->fd = fd;
234         dev->memctx = memctx;
235         dev->gpu_id = panfrost_query_gpu_version(fd);
236         dev->arch = pan_arch(dev->gpu_id);
237         dev->core_count = panfrost_query_core_count(fd);
238         dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(fd, dev->arch);
239         dev->kernel_version = drmGetVersion(fd);
240         unsigned revision = panfrost_query_gpu_revision(fd);
241         dev->quirks = panfrost_get_quirks(dev->gpu_id, revision);
242         dev->compressed_formats = panfrost_query_compressed_formats(fd);
243         dev->tiler_features = panfrost_query_tiler_features(fd);
244         dev->has_afbc = panfrost_query_afbc(fd, dev->arch);
245 
246         if (dev->quirks & HAS_SWIZZLES)
247                 dev->formats = panfrost_pipe_format_v6;
248         else
249                 dev->formats = panfrost_pipe_format_v7;
250 
251         util_sparse_array_init(&dev->bo_map, sizeof(struct panfrost_bo), 512);
252 
253         pthread_mutex_init(&dev->bo_cache.lock, NULL);
254         list_inithead(&dev->bo_cache.lru);
255 
256         for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
257                 list_inithead(&dev->bo_cache.buckets[i]);
258 
259         /* Initialize pandecode before we start allocating */
260         if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
261                 pandecode_initialize(!(dev->debug & PAN_DBG_TRACE));
262 
263         /* Tiler heap is internally required by the tiler, which can only be
264          * active for a single job chain at once, so a single heap can be
265          * shared across batches/contextes */
266 
267         dev->tiler_heap = panfrost_bo_create(dev, 64 * 1024 * 1024,
268                         PAN_BO_INVISIBLE | PAN_BO_GROWABLE, "Tiler heap");
269 
270         pthread_mutex_init(&dev->submit_lock, NULL);
271 
272         /* Done once on init */
273         panfrost_upload_sample_positions(dev);
274 }
275 
276 void
panfrost_close_device(struct panfrost_device * dev)277 panfrost_close_device(struct panfrost_device *dev)
278 {
279         pthread_mutex_destroy(&dev->submit_lock);
280         panfrost_bo_unreference(dev->tiler_heap);
281         panfrost_bo_cache_evict_all(dev);
282         pthread_mutex_destroy(&dev->bo_cache.lock);
283         drmFreeVersion(dev->kernel_version);
284         util_sparse_array_finish(&dev->bo_map);
285         close(dev->fd);
286 }
287