1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * The Original Code is Copyright (C) 2016 by Mike Erwin.
17  * All rights reserved.
18  */
19 
20 /** \file
21  * \ingroup gpu
22  *
23  * GPU vertex format
24  */
25 
26 #include "GPU_vertex_format.h"
27 #include "gpu_shader_private.hh"
28 #include "gpu_vertex_format_private.h"
29 
30 #include <stddef.h>
31 #include <string.h>
32 
33 #include "BLI_ghash.h"
34 #include "BLI_string.h"
35 #include "BLI_utildefines.h"
36 
37 #define PACK_DEBUG 0
38 
39 #if PACK_DEBUG
40 #  include <stdio.h>
41 #endif
42 
43 using namespace blender::gpu;
44 
GPU_vertformat_clear(GPUVertFormat * format)45 void GPU_vertformat_clear(GPUVertFormat *format)
46 {
47 #if TRUST_NO_ONE
48   memset(format, 0, sizeof(GPUVertFormat));
49 #else
50   format->attr_len = 0;
51   format->packed = false;
52   format->name_offset = 0;
53   format->name_len = 0;
54   format->deinterleaved = false;
55 
56   for (uint i = 0; i < GPU_VERT_ATTR_MAX_LEN; i++) {
57     format->attrs[i].name_len = 0;
58   }
59 #endif
60 }
61 
GPU_vertformat_copy(GPUVertFormat * dest,const GPUVertFormat * src)62 void GPU_vertformat_copy(GPUVertFormat *dest, const GPUVertFormat *src)
63 {
64   /* copy regular struct fields */
65   memcpy(dest, src, sizeof(GPUVertFormat));
66 }
67 
comp_sz(GPUVertCompType type)68 static uint comp_sz(GPUVertCompType type)
69 {
70 #if TRUST_NO_ONE
71   assert(type <= GPU_COMP_F32); /* other types have irregular sizes (not bytes) */
72 #endif
73   const uint sizes[] = {1, 1, 2, 2, 4, 4, 4};
74   return sizes[type];
75 }
76 
attr_sz(const GPUVertAttr * a)77 static uint attr_sz(const GPUVertAttr *a)
78 {
79   if (a->comp_type == GPU_COMP_I10) {
80     return 4; /* always packed as 10_10_10_2 */
81   }
82   return a->comp_len * comp_sz(static_cast<GPUVertCompType>(a->comp_type));
83 }
84 
attr_align(const GPUVertAttr * a)85 static uint attr_align(const GPUVertAttr *a)
86 {
87   if (a->comp_type == GPU_COMP_I10) {
88     return 4; /* always packed as 10_10_10_2 */
89   }
90   uint c = comp_sz(static_cast<GPUVertCompType>(a->comp_type));
91   if (a->comp_len == 3 && c <= 2) {
92     return 4 * c; /* AMD HW can't fetch these well, so pad it out (other vendors too?) */
93   }
94 
95   return c; /* most fetches are ok if components are naturally aligned */
96 }
97 
vertex_buffer_size(const GPUVertFormat * format,uint vertex_len)98 uint vertex_buffer_size(const GPUVertFormat *format, uint vertex_len)
99 {
100 #if TRUST_NO_ONE
101   assert(format->packed && format->stride > 0);
102 #endif
103   return format->stride * vertex_len;
104 }
105 
copy_attr_name(GPUVertFormat * format,const char * name)106 static uchar copy_attr_name(GPUVertFormat *format, const char *name)
107 {
108   /* strncpy does 110% of what we need; let's do exactly 100% */
109   uchar name_offset = format->name_offset;
110   char *name_copy = format->names + name_offset;
111   uint available = GPU_VERT_ATTR_NAMES_BUF_LEN - name_offset;
112   bool terminated = false;
113 
114   for (uint i = 0; i < available; i++) {
115     const char c = name[i];
116     name_copy[i] = c;
117     if (c == '\0') {
118       terminated = true;
119       format->name_offset += (i + 1);
120       break;
121     }
122   }
123 #if TRUST_NO_ONE
124   assert(terminated);
125   assert(format->name_offset <= GPU_VERT_ATTR_NAMES_BUF_LEN);
126 #else
127   (void)terminated;
128 #endif
129   return name_offset;
130 }
131 
GPU_vertformat_attr_add(GPUVertFormat * format,const char * name,GPUVertCompType comp_type,uint comp_len,GPUVertFetchMode fetch_mode)132 uint GPU_vertformat_attr_add(GPUVertFormat *format,
133                              const char *name,
134                              GPUVertCompType comp_type,
135                              uint comp_len,
136                              GPUVertFetchMode fetch_mode)
137 {
138 #if TRUST_NO_ONE
139   assert(format->name_len < GPU_VERT_FORMAT_MAX_NAMES); /* there's room for more */
140   assert(format->attr_len < GPU_VERT_ATTR_MAX_LEN);     /* there's room for more */
141   assert(!format->packed);                              /* packed means frozen/locked */
142   assert((comp_len >= 1 && comp_len <= 4) || comp_len == 8 || comp_len == 12 || comp_len == 16);
143 
144   switch (comp_type) {
145     case GPU_COMP_F32:
146       /* float type can only kept as float */
147       assert(fetch_mode == GPU_FETCH_FLOAT);
148       break;
149     case GPU_COMP_I10:
150       /* 10_10_10 format intended for normals (xyz) or colors (rgb)
151        * extra component packed.w can be manually set to { -2, -1, 0, 1 } */
152       assert(comp_len == 3 || comp_len == 4);
153 
154       /* Not strictly required, may relax later. */
155       assert(fetch_mode == GPU_FETCH_INT_TO_FLOAT_UNIT);
156 
157       break;
158     default:
159       /* integer types can be kept as int or converted/normalized to float */
160       assert(fetch_mode != GPU_FETCH_FLOAT);
161       /* only support float matrices (see Batch_update_program_bindings) */
162       assert(comp_len != 8 && comp_len != 12 && comp_len != 16);
163   }
164 #endif
165   format->name_len++; /* multiname support */
166 
167   const uint attr_id = format->attr_len++;
168   GPUVertAttr *attr = &format->attrs[attr_id];
169 
170   attr->names[attr->name_len++] = copy_attr_name(format, name);
171   attr->comp_type = comp_type;
172   attr->comp_len = (comp_type == GPU_COMP_I10) ?
173                        4 :
174                        comp_len; /* system needs 10_10_10_2 to be 4 or BGRA */
175   attr->sz = attr_sz(attr);
176   attr->offset = 0; /* offsets & stride are calculated later (during pack) */
177   attr->fetch_mode = fetch_mode;
178 
179   return attr_id;
180 }
181 
GPU_vertformat_alias_add(GPUVertFormat * format,const char * alias)182 void GPU_vertformat_alias_add(GPUVertFormat *format, const char *alias)
183 {
184   GPUVertAttr *attr = &format->attrs[format->attr_len - 1];
185 #if TRUST_NO_ONE
186   assert(format->name_len < GPU_VERT_FORMAT_MAX_NAMES); /* there's room for more */
187   assert(attr->name_len < GPU_VERT_ATTR_MAX_NAMES);
188 #endif
189   format->name_len++; /* multiname support */
190   attr->names[attr->name_len++] = copy_attr_name(format, alias);
191 }
192 
193 /**
194  * Makes vertex attribute from the next vertices to be accessible in the vertex shader.
195  * For an attribute named "attr" you can access the next nth vertex using "attr{number}".
196  * Use this function after specifying all the attributes in the format.
197  *
198  * NOTE: This does NOT work when using indexed rendering.
199  * NOTE: Only works for first attribute name. (this limitation can be changed if needed)
200  *
201  * WARNING: this function creates a lot of aliases/attributes, make sure to keep the attribute
202  * name short to avoid overflowing the name-buffer.
203  * */
GPU_vertformat_multiload_enable(GPUVertFormat * format,int load_count)204 void GPU_vertformat_multiload_enable(GPUVertFormat *format, int load_count)
205 {
206   /* Sanity check. Maximum can be upgraded if needed. */
207   BLI_assert(load_count > 1 && load_count < 5);
208   /* We need a packed format because of format->stride. */
209   if (!format->packed) {
210     VertexFormat_pack(format);
211   }
212 
213   BLI_assert((format->name_len + 1) * load_count < GPU_VERT_FORMAT_MAX_NAMES);
214   BLI_assert(format->attr_len * load_count <= GPU_VERT_ATTR_MAX_LEN);
215   BLI_assert(format->name_offset * load_count < GPU_VERT_ATTR_NAMES_BUF_LEN);
216 
217   const GPUVertAttr *attr = format->attrs;
218   int attr_len = format->attr_len;
219   for (int i = 0; i < attr_len; i++, attr++) {
220     const char *attr_name = GPU_vertformat_attr_name_get(format, attr, 0);
221     for (int j = 1; j < load_count; j++) {
222       char load_name[64];
223       BLI_snprintf(load_name, sizeof(load_name), "%s%d", attr_name, j);
224       GPUVertAttr *dst_attr = &format->attrs[format->attr_len++];
225       *dst_attr = *attr;
226 
227       dst_attr->names[0] = copy_attr_name(format, load_name);
228       dst_attr->name_len = 1;
229       dst_attr->offset += format->stride * j;
230     }
231   }
232 }
233 
GPU_vertformat_attr_id_get(const GPUVertFormat * format,const char * name)234 int GPU_vertformat_attr_id_get(const GPUVertFormat *format, const char *name)
235 {
236   for (int i = 0; i < format->attr_len; i++) {
237     const GPUVertAttr *attr = &format->attrs[i];
238     for (int j = 0; j < attr->name_len; j++) {
239       const char *attr_name = GPU_vertformat_attr_name_get(format, attr, j);
240       if (STREQ(name, attr_name)) {
241         return i;
242       }
243     }
244   }
245   return -1;
246 }
247 
GPU_vertformat_attr_rename(GPUVertFormat * format,int attr_id,const char * new_name)248 void GPU_vertformat_attr_rename(GPUVertFormat *format, int attr_id, const char *new_name)
249 {
250   BLI_assert(attr_id > -1 && attr_id < format->attr_len);
251   GPUVertAttr *attr = &format->attrs[attr_id];
252   char *attr_name = (char *)GPU_vertformat_attr_name_get(format, attr, 0);
253   BLI_assert(strlen(attr_name) == strlen(new_name));
254   int i = 0;
255   while (attr_name[i] != '\0') {
256     attr_name[i] = new_name[i];
257     i++;
258   }
259   attr->name_len = 1;
260 }
261 
262 /* Encode 8 original bytes into 11 safe bytes. */
safe_bytes(char out[11],const char data[8])263 static void safe_bytes(char out[11], const char data[8])
264 {
265   char safe_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
266 
267   uint64_t in = *(uint64_t *)data;
268   for (int i = 0; i < 11; i++) {
269     /* Encoding in base63 */
270     out[i] = safe_chars[in % 63lu];
271     in /= 63lu;
272   }
273 }
274 
275 /* Warning: Always add a prefix to the result of this function as
276  * the generated string can start with a number and not be a valid attribute name. */
GPU_vertformat_safe_attr_name(const char * attr_name,char * r_safe_name,uint UNUSED (max_len))277 void GPU_vertformat_safe_attr_name(const char *attr_name, char *r_safe_name, uint UNUSED(max_len))
278 {
279   char data[8] = {0};
280   uint len = strlen(attr_name);
281 
282   if (len > 8) {
283     /* Start with the first 4 chars of the name; */
284     for (int i = 0; i < 4; i++) {
285       data[i] = attr_name[i];
286     }
287     /* We use a hash to identify each data layer based on its name.
288      * NOTE: This is still prone to hash collision but the risks are very low.*/
289     /* Start hashing after the first 2 chars. */
290     *(uint *)&data[4] = BLI_ghashutil_strhash_p_murmur(attr_name + 4);
291   }
292   else {
293     /* Copy the whole name. Collision is barely possible
294      * (hash would have to be equal to the last 4 bytes). */
295     for (int i = 0; i < 8 && attr_name[i] != '\0'; i++) {
296       data[i] = attr_name[i];
297     }
298   }
299   /* Convert to safe bytes characters. */
300   safe_bytes(r_safe_name, data);
301   /* End the string */
302   r_safe_name[11] = '\0';
303 
304   BLI_assert(GPU_MAX_SAFE_ATTR_NAME >= 12);
305 #if 0 /* For debugging */
306   printf("%s > %lx > %s\n", attr_name, *(uint64_t *)data, r_safe_name);
307 #endif
308 }
309 
310 /* Make attribute layout non-interleaved.
311  * Warning! This does not change data layout!
312  * Use direct buffer access to fill the data.
313  * This is for advanced usage.
314  *
315  * De-interleaved data means all attribute data for each attribute
316  * is stored continuously like this:
317  * 000011112222
318  * instead of :
319  * 012012012012
320  *
321  * Note this is per attribute de-interleaving, NOT per component.
322  *  */
GPU_vertformat_deinterleave(GPUVertFormat * format)323 void GPU_vertformat_deinterleave(GPUVertFormat *format)
324 {
325   /* Ideally we should change the stride and offset here. This would allow
326    * us to use GPU_vertbuf_attr_set / GPU_vertbuf_attr_fill. But since
327    * we use only 11 bits for attr->offset this limits the size of the
328    * buffer considerably. So instead we do the conversion when creating
329    * bindings in create_bindings(). */
330   format->deinterleaved = true;
331 }
332 
padding(uint offset,uint alignment)333 uint padding(uint offset, uint alignment)
334 {
335   const uint mod = offset % alignment;
336   return (mod == 0) ? 0 : (alignment - mod);
337 }
338 
339 #if PACK_DEBUG
show_pack(uint a_idx,uint sz,uint pad)340 static void show_pack(uint a_idx, uint sz, uint pad)
341 {
342   const char c = 'A' + a_idx;
343   for (uint i = 0; i < pad; i++) {
344     putchar('-');
345   }
346   for (uint i = 0; i < sz; i++) {
347     putchar(c);
348   }
349 }
350 #endif
351 
VertexFormat_pack(GPUVertFormat * format)352 void VertexFormat_pack(GPUVertFormat *format)
353 {
354   GPUVertAttr *a0 = &format->attrs[0];
355   a0->offset = 0;
356   uint offset = a0->sz;
357 
358 #if PACK_DEBUG
359   show_pack(0, a0->sz, 0);
360 #endif
361 
362   for (uint a_idx = 1; a_idx < format->attr_len; a_idx++) {
363     GPUVertAttr *a = &format->attrs[a_idx];
364     uint mid_padding = padding(offset, attr_align(a));
365     offset += mid_padding;
366     a->offset = offset;
367     offset += a->sz;
368 
369 #if PACK_DEBUG
370     show_pack(a_idx, a->sz, mid_padding);
371 #endif
372   }
373 
374   uint end_padding = padding(offset, attr_align(a0));
375 
376 #if PACK_DEBUG
377   show_pack(0, 0, end_padding);
378   putchar('\n');
379 #endif
380   format->stride = offset + end_padding;
381   format->packed = true;
382 }
383 
GPU_vertformat_from_shader(GPUVertFormat * format,const struct GPUShader * gpushader)384 void GPU_vertformat_from_shader(GPUVertFormat *format, const struct GPUShader *gpushader)
385 {
386   const Shader *shader = reinterpret_cast<const Shader *>(gpushader);
387   shader->vertformat_from_shader(format);
388 }
389