1 /*
2  * This file is part of libplacebo.
3  *
4  * libplacebo is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * libplacebo is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <strings.h>
19 
20 #include "log.h"
21 #include "common.h"
22 #include "gpu.h"
23 
24 #define MAX_COMPS 4
25 
26 struct comp {
27     int order; // e.g. 0, 1, 2, 3 for RGBA
28     int size;  // size in bits
29     int shift; // bit-shift / offset in bits
30 };
31 
compare_comp(const void * pa,const void * pb)32 static int compare_comp(const void *pa, const void *pb)
33 {
34     const struct comp *a = pa, *b = pb;
35 
36     // Move all of the components with a size of 0 to the end, so they can
37     // be ignored outright
38     if (a->size && !b->size)
39         return -1;
40     if (b->size && !a->size)
41         return 1;
42 
43     // Otherwise, just compare based on the shift
44     return PL_CMP(a->shift, b->shift);
45 }
46 
pl_plane_data_from_mask(struct pl_plane_data * data,uint64_t mask[4])47 void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4])
48 {
49     struct comp comps[MAX_COMPS] = { {0}, {1}, {2}, {3} };
50 
51     for (int i = 0; i < PL_ARRAY_SIZE(comps); i++) {
52         comps[i].size = __builtin_popcountll(mask[i]);
53         comps[i].shift = PL_MAX(0, __builtin_ffsll(mask[i]) - 1);
54 
55         // Sanity checking
56         uint64_t mask_reconstructed = (1LLU << comps[i].size) - 1;
57         mask_reconstructed <<= comps[i].shift;
58         pl_assert(mask_reconstructed == mask[i]);
59     }
60 
61     // Sort the components by shift
62     qsort(comps, MAX_COMPS, sizeof(struct comp), compare_comp);
63 
64     // Generate the resulting component size/pad/map
65     int offset = 0;
66     for (int i = 0; i < MAX_COMPS; i++)  {
67         if (comps[i].size) {
68             assert(comps[i].shift >= offset);
69             data->component_size[i] = comps[i].size;
70             data->component_pad[i] = comps[i].shift - offset;
71             data->component_map[i] = comps[i].order;
72             offset += data->component_size[i] + data->component_pad[i];
73         } else {
74             // Clear the superfluous entries for sanity
75             data->component_size[i] = 0;
76             data->component_pad[i] = 0;
77             data->component_map[i] = 0;
78         }
79     }
80 }
81 
pl_plane_data_align(struct pl_plane_data * data,struct pl_bit_encoding * out_bits)82 bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits)
83 {
84     struct pl_plane_data aligned = *data;
85     struct pl_bit_encoding bits = {0};
86 
87     int offset = 0;
88 
89 #define SET_TEST(var, value)                \
90     do {                                    \
91         if (offset == 0) {                  \
92             (var) = (value);                \
93         } else if ((var) != (value)) {      \
94             goto misaligned;                \
95         }                                   \
96     } while (0)
97 
98     for (int i = 0; i < MAX_COMPS; i++) {
99         if (!aligned.component_size[i])
100             break;
101 
102         // Can't meaningfully align alpha channel, so just skip it. This is a
103         // limitation of the fact that `pl_bit_encoding` only applies to the
104         // main color channels, and changing this would be very nontrivial.
105         if (aligned.component_map[i] == PL_CHANNEL_A)
106             continue;
107 
108         // Color depth is the original component size, before alignment
109         SET_TEST(bits.color_depth, aligned.component_size[i]);
110 
111         // Try consuming padding of the current component to align down. This
112         // corresponds to an extra bit shift to the left.
113         int comp_start = offset + aligned.component_pad[i];
114         int left_delta = comp_start - PL_ALIGN2(comp_start - 7, 8);
115         left_delta = PL_MIN(left_delta, aligned.component_pad[i]);
116         aligned.component_pad[i] -= left_delta;
117         aligned.component_size[i] += left_delta;
118         SET_TEST(bits.bit_shift, left_delta);
119 
120         // Try consuming padding of the next component to align up. This
121         // corresponds to simply ignoring some extra 0s on the end.
122         int comp_end = comp_start + aligned.component_size[i] - left_delta;
123         int right_delta = PL_ALIGN2(comp_end, 8) - comp_end;
124         if (i+1 == MAX_COMPS || !aligned.component_size[i+1]) {
125             // This is the last component, so we can be greedy
126             aligned.component_size[i] += right_delta;
127         } else {
128             right_delta = PL_MIN(right_delta, aligned.component_pad[i+1]);
129             aligned.component_pad[i+1] -= right_delta;
130             aligned.component_size[i] += right_delta;
131         }
132 
133         // Sample depth is the new total component size, including padding
134         SET_TEST(bits.sample_depth, aligned.component_size[i]);
135 
136         offset += aligned.component_pad[i] + aligned.component_size[i];
137     }
138 
139     // Easy sanity check, to make sure that we don't exceed the known stride
140     if (aligned.pixel_stride && offset > aligned.pixel_stride * 8)
141         goto misaligned;
142 
143     *data = aligned;
144     if (out_bits)
145         *out_bits = bits;
146     return true;
147 
148 misaligned:
149     // Can't properly align anything, so just do a no-op
150     if (out_bits)
151         *out_bits = (struct pl_bit_encoding) {0};
152     return false;
153 }
154 
pl_plane_find_fmt(pl_gpu gpu,int out_map[4],const struct pl_plane_data * data)155 pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data)
156 {
157     int dummy[4] = {0};
158     out_map = PL_DEF(out_map, dummy);
159 
160     // Count the number of components and initialize out_map
161     int num = 0;
162     for (int i = 0; i < PL_ARRAY_SIZE(data->component_size); i++) {
163         out_map[i] = -1;
164         if (data->component_size[i])
165             num = i+1;
166     }
167 
168     for (int n = 0; n < gpu->num_formats; n++) {
169         pl_fmt fmt = gpu->formats[n];
170         if (fmt->opaque || fmt->num_components < num)
171             continue;
172         if (fmt->type != data->type || fmt->texel_size != data->pixel_stride)
173             continue;
174         if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE))
175             continue;
176 
177         int idx = 0;
178 
179         // Try mapping all pl_plane_data components to texture components
180         for (int i = 0; i < num; i++) {
181             // If there's padding we have to map it to an unused physical
182             // component first
183             int pad = data->component_pad[i];
184             if (pad && (idx >= 4 || fmt->host_bits[idx++] != pad))
185                 goto next_fmt;
186 
187             // Otherwise, try and match this component
188             int size = data->component_size[i];
189             if (size && (idx >= 4 || fmt->host_bits[idx] != size))
190                 goto next_fmt;
191             out_map[idx++] = data->component_map[i];
192         }
193 
194         return fmt;
195 
196 next_fmt: ; // acts as `continue`
197     }
198 
199     return NULL;
200 }
201 
pl_upload_plane(pl_gpu gpu,struct pl_plane * out_plane,pl_tex * tex,const struct pl_plane_data * data)202 bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane,
203                      pl_tex *tex, const struct pl_plane_data *data)
204 {
205     pl_assert(!data->buf ^ !data->pixels); // exactly one
206 
207     if (data->buf) {
208         pl_assert(data->buf_offset == PL_ALIGN2(data->buf_offset, 4));
209         pl_assert(data->buf_offset == PL_ALIGN(data->buf_offset, data->pixel_stride));
210     }
211 
212     size_t row_stride = PL_DEF(data->row_stride, data->pixel_stride * data->width);
213     unsigned int stride_texels = row_stride / data->pixel_stride;
214     if (stride_texels * data->pixel_stride != row_stride) {
215         PL_ERR(gpu, "data->row_stride must be a multiple of data->pixel_stride!");
216         return false;
217     }
218 
219     int out_map[4];
220     pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data);
221     if (!fmt) {
222         PL_ERR(gpu, "Failed picking any compatible texture format for a plane!");
223         return false;
224 
225         // TODO: try soft-converting to a supported format using e.g zimg?
226     }
227 
228     bool ok = pl_tex_recreate(gpu, tex, &(struct pl_tex_params) {
229         .w = data->width,
230         .h = data->height,
231         .format = fmt,
232         .sampleable = true,
233         .host_writable = true,
234         .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE,
235     });
236 
237     if (!ok) {
238         PL_ERR(gpu, "Failed initializing plane texture!");
239         return false;
240     }
241 
242     if (out_plane) {
243         out_plane->texture = *tex;
244         out_plane->components = 0;
245         for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) {
246             out_plane->component_mapping[i] = out_map[i];
247             if (out_map[i] >= 0)
248                 out_plane->components = i+1;
249         }
250     }
251 
252     return pl_tex_upload(gpu, &(struct pl_tex_transfer_params) {
253         .tex        = *tex,
254         .stride_w   = stride_texels,
255         .ptr        = (void *) data->pixels,
256         .buf        = data->buf,
257         .buf_offset = data->buf_offset,
258         .callback   = data->callback,
259         .priv       = data->priv,
260     });
261 }
262 
pl_recreate_plane(pl_gpu gpu,struct pl_plane * out_plane,pl_tex * tex,const struct pl_plane_data * data)263 bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane,
264                        pl_tex *tex, const struct pl_plane_data *data)
265 {
266     int out_map[4];
267     pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data);
268     if (!fmt) {
269         PL_ERR(gpu, "Failed picking any compatible texture format for a plane!");
270         return false;
271     }
272 
273     bool ok = pl_tex_recreate(gpu, tex, &(struct pl_tex_params) {
274         .w = data->width,
275         .h = data->height,
276         .format = fmt,
277         .renderable = true,
278         .host_readable = true,
279         .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE,
280         .storable = fmt->caps & PL_FMT_CAP_STORABLE,
281     });
282 
283     if (!ok) {
284         PL_ERR(gpu, "Failed initializing plane texture!");
285         return false;
286     }
287 
288     if (out_plane) {
289         out_plane->texture = *tex;
290         out_plane->components = 0;
291         for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) {
292             out_plane->component_mapping[i] = out_map[i];
293             if (out_map[i] >= 0)
294                 out_plane->components = i+1;
295         }
296     }
297 
298     return true;
299 }
300