1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "main/glthread_marshal.h"
25 #include "main/dispatch.h"
26 #include "main/bufferobj.h"
27 
28 /**
29  * Create an upload buffer. This is called from the app thread, so everything
30  * has to be thread-safe in the driver.
31  */
32 static struct gl_buffer_object *
new_upload_buffer(struct gl_context * ctx,GLsizeiptr size,uint8_t ** ptr)33 new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr)
34 {
35    assert(ctx->GLThread.SupportsBufferUploads);
36 
37    struct gl_buffer_object *obj =
38       _mesa_bufferobj_alloc(ctx, -1);
39    if (!obj)
40       return NULL;
41 
42    obj->Immutable = true;
43 
44    if (!_mesa_bufferobj_data(ctx, GL_ARRAY_BUFFER, size, NULL,
45                           GL_WRITE_ONLY,
46                           GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT,
47                           obj)) {
48       _mesa_delete_buffer_object(ctx, obj);
49       return NULL;
50    }
51 
52    *ptr = _mesa_bufferobj_map_range(ctx, 0, size,
53                                  GL_MAP_WRITE_BIT |
54                                  GL_MAP_UNSYNCHRONIZED_BIT |
55                                  MESA_MAP_THREAD_SAFE_BIT,
56                                  obj, MAP_GLTHREAD);
57    if (!*ptr) {
58       _mesa_delete_buffer_object(ctx, obj);
59       return NULL;
60    }
61 
62    return obj;
63 }
64 
65 void
_mesa_glthread_upload(struct gl_context * ctx,const void * data,GLsizeiptr size,unsigned * out_offset,struct gl_buffer_object ** out_buffer,uint8_t ** out_ptr)66 _mesa_glthread_upload(struct gl_context *ctx, const void *data,
67                       GLsizeiptr size, unsigned *out_offset,
68                       struct gl_buffer_object **out_buffer,
69                       uint8_t **out_ptr)
70 {
71    struct glthread_state *glthread = &ctx->GLThread;
72    const unsigned default_size = 1024 * 1024;
73 
74    if (unlikely(size > INT_MAX))
75       return;
76 
77    /* The alignment was chosen arbitrarily. */
78    unsigned offset = align(glthread->upload_offset, 8);
79 
80    /* Allocate a new buffer if needed. */
81    if (unlikely(!glthread->upload_buffer || offset + size > default_size)) {
82       /* If the size is greater than the buffer size, allocate a separate buffer
83        * just for this upload.
84        */
85       if (unlikely(size > default_size)) {
86          uint8_t *ptr;
87 
88          assert(*out_buffer == NULL);
89          *out_buffer = new_upload_buffer(ctx, size, &ptr);
90          if (!*out_buffer)
91             return;
92 
93          *out_offset = 0;
94          if (data)
95             memcpy(ptr, data, size);
96          else
97             *out_ptr = ptr;
98          return;
99       }
100 
101       if (glthread->upload_buffer_private_refcount > 0) {
102          p_atomic_add(&glthread->upload_buffer->RefCount,
103                       -glthread->upload_buffer_private_refcount);
104          glthread->upload_buffer_private_refcount = 0;
105       }
106       _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL);
107       glthread->upload_buffer =
108          new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
109       glthread->upload_offset = 0;
110       offset = 0;
111 
112       /* Since atomic operations are very very slow when 2 threads are not
113        * sharing one L3 cache (which can happen on AMD Zen), prevent using
114        * atomics as follows:
115        *
116        * This function has to return a buffer reference to the caller.
117        * Instead of atomic_inc for every call, it does all possible future
118        * increments in advance when the upload buffer is allocated.
119        * The maximum number of times the function can be called per upload
120        * buffer is default_size, because the minimum allocation size is 1.
121        * Therefore the function can only return default_size number of
122        * references at most, so we will never need more. This is the number
123        * that is added to RefCount at allocation.
124        *
125        * upload_buffer_private_refcount tracks how many buffer references
126        * are left to return to callers. If the buffer is full and there are
127        * still references left, they are atomically subtracted from RefCount
128        * before the buffer is unreferenced.
129        *
130        * This can increase performance by 20%.
131        */
132       glthread->upload_buffer->RefCount += default_size;
133       glthread->upload_buffer_private_refcount = default_size;
134    }
135 
136    /* Upload data. */
137    if (data)
138       memcpy(glthread->upload_ptr + offset, data, size);
139    else
140       *out_ptr = glthread->upload_ptr + offset;
141 
142    glthread->upload_offset = offset + size;
143    *out_offset = offset;
144 
145    assert(*out_buffer == NULL);
146    assert(glthread->upload_buffer_private_refcount > 0);
147    *out_buffer = glthread->upload_buffer;
148    glthread->upload_buffer_private_refcount--;
149 }
150 
151 /** Tracks the current bindings for the vertex array and index array buffers.
152  *
153  * This is part of what we need to enable glthread on compat-GL contexts that
154  * happen to use VBOs, without also supporting the full tracking of VBO vs
155  * user vertex array bindings per attribute on each vertex array for
156  * determining what to upload at draw call time.
157  *
158  * Note that GL core makes it so that a buffer binding with an invalid handle
159  * in the "buffer" parameter will throw an error, and then a
160  * glVertexAttribPointer() that followsmight not end up pointing at a VBO.
161  * However, in GL core the draw call would throw an error as well, so we don't
162  * really care if our tracking is wrong for this case -- we never need to
163  * marshal user data for draw calls, and the unmarshal will just generate an
164  * error or not as appropriate.
165  *
166  * For compatibility GL, we do need to accurately know whether the draw call
167  * on the unmarshal side will dereference a user pointer or load data from a
168  * VBO per vertex.  That would make it seem like we need to track whether a
169  * "buffer" is valid, so that we can know when an error will be generated
170  * instead of updating the binding.  However, compat GL has the ridiculous
171  * feature that if you pass a bad name, it just gens a buffer object for you,
172  * so we escape without having to know if things are valid or not.
173  */
174 void
_mesa_glthread_BindBuffer(struct gl_context * ctx,GLenum target,GLuint buffer)175 _mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer)
176 {
177    struct glthread_state *glthread = &ctx->GLThread;
178 
179    switch (target) {
180    case GL_ARRAY_BUFFER:
181       glthread->CurrentArrayBufferName = buffer;
182       break;
183    case GL_ELEMENT_ARRAY_BUFFER:
184       /* The current element array buffer binding is actually tracked in the
185        * vertex array object instead of the context, so this would need to
186        * change on vertex array object updates.
187        */
188       glthread->CurrentVAO->CurrentElementBufferName = buffer;
189       break;
190    case GL_DRAW_INDIRECT_BUFFER:
191       glthread->CurrentDrawIndirectBufferName = buffer;
192       break;
193    case GL_PIXEL_PACK_BUFFER:
194       glthread->CurrentPixelPackBufferName = buffer;
195       break;
196    case GL_PIXEL_UNPACK_BUFFER:
197       glthread->CurrentPixelUnpackBufferName = buffer;
198       break;
199    case GL_QUERY_BUFFER:
200       glthread->CurrentQueryBufferName = buffer;
201       break;
202    }
203 }
204 
205 void
_mesa_glthread_DeleteBuffers(struct gl_context * ctx,GLsizei n,const GLuint * buffers)206 _mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n,
207                              const GLuint *buffers)
208 {
209    struct glthread_state *glthread = &ctx->GLThread;
210 
211    if (!buffers)
212       return;
213 
214    for (unsigned i = 0; i < n; i++) {
215       GLuint id = buffers[i];
216 
217       if (id == glthread->CurrentArrayBufferName)
218          _mesa_glthread_BindBuffer(ctx, GL_ARRAY_BUFFER, 0);
219       if (id == glthread->CurrentVAO->CurrentElementBufferName)
220          _mesa_glthread_BindBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, 0);
221       if (id == glthread->CurrentDrawIndirectBufferName)
222          _mesa_glthread_BindBuffer(ctx, GL_DRAW_INDIRECT_BUFFER, 0);
223       if (id == glthread->CurrentPixelPackBufferName)
224          _mesa_glthread_BindBuffer(ctx, GL_PIXEL_PACK_BUFFER, 0);
225       if (id == glthread->CurrentPixelUnpackBufferName)
226          _mesa_glthread_BindBuffer(ctx, GL_PIXEL_UNPACK_BUFFER, 0);
227    }
228 }
229 
230 /* BufferData: marshalled asynchronously */
231 struct marshal_cmd_BufferData
232 {
233    struct marshal_cmd_base cmd_base;
234    GLuint target_or_name;
235    GLsizeiptr size;
236    GLenum usage;
237    const GLvoid *data_external_mem;
238    bool data_null; /* If set, no data follows for "data" */
239    bool named;
240    bool ext_dsa;
241    /* Next size bytes are GLubyte data[size] */
242 };
243 
244 uint32_t
_mesa_unmarshal_BufferData(struct gl_context * ctx,const struct marshal_cmd_BufferData * cmd,const uint64_t * last)245 _mesa_unmarshal_BufferData(struct gl_context *ctx,
246                            const struct marshal_cmd_BufferData *cmd,
247                            const uint64_t *last)
248 {
249    const GLuint target_or_name = cmd->target_or_name;
250    const GLsizei size = cmd->size;
251    const GLenum usage = cmd->usage;
252    const void *data;
253 
254    if (cmd->data_null)
255       data = NULL;
256    else if (!cmd->named && target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD)
257       data = cmd->data_external_mem;
258    else
259       data = (const void *) (cmd + 1);
260 
261    if (cmd->ext_dsa) {
262       CALL_NamedBufferDataEXT(ctx->CurrentServerDispatch,
263                               (target_or_name, size, data, usage));
264    } else if (cmd->named) {
265       CALL_NamedBufferData(ctx->CurrentServerDispatch,
266                            (target_or_name, size, data, usage));
267    } else {
268       CALL_BufferData(ctx->CurrentServerDispatch,
269                       (target_or_name, size, data, usage));
270    }
271    return cmd->cmd_base.cmd_size;
272 }
273 
274 uint32_t
_mesa_unmarshal_NamedBufferData(struct gl_context * ctx,const struct marshal_cmd_NamedBufferData * cmd,const uint64_t * last)275 _mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
276                                 const struct marshal_cmd_NamedBufferData *cmd,
277                                 const uint64_t *last)
278 {
279    unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
280    return 0;
281 }
282 
283 uint32_t
_mesa_unmarshal_NamedBufferDataEXT(struct gl_context * ctx,const struct marshal_cmd_NamedBufferDataEXT * cmd,const uint64_t * last)284 _mesa_unmarshal_NamedBufferDataEXT(struct gl_context *ctx,
285                                    const struct marshal_cmd_NamedBufferDataEXT *cmd,
286                                    const uint64_t *last)
287 {
288    unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
289    return 0;
290 }
291 
292 static void
_mesa_marshal_BufferData_merged(GLuint target_or_name,GLsizeiptr size,const GLvoid * data,GLenum usage,bool named,bool ext_dsa,const char * func)293 _mesa_marshal_BufferData_merged(GLuint target_or_name, GLsizeiptr size,
294                                 const GLvoid *data, GLenum usage, bool named,
295                                 bool ext_dsa, const char *func)
296 {
297    GET_CURRENT_CONTEXT(ctx);
298    bool external_mem = !named &&
299                        target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD;
300    bool copy_data = data && !external_mem;
301    size_t cmd_size = sizeof(struct marshal_cmd_BufferData) + (copy_data ? size : 0);
302 
303    if (unlikely(size < 0 || size > INT_MAX || cmd_size > MARSHAL_MAX_CMD_SIZE ||
304                 (named && target_or_name == 0))) {
305       _mesa_glthread_finish_before(ctx, func);
306       if (named) {
307          CALL_NamedBufferData(ctx->CurrentServerDispatch,
308                               (target_or_name, size, data, usage));
309       } else {
310          CALL_BufferData(ctx->CurrentServerDispatch,
311                          (target_or_name, size, data, usage));
312       }
313       return;
314    }
315 
316    struct marshal_cmd_BufferData *cmd =
317       _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferData,
318                                       cmd_size);
319 
320    cmd->target_or_name = target_or_name;
321    cmd->size = size;
322    cmd->usage = usage;
323    cmd->data_null = !data;
324    cmd->named = named;
325    cmd->ext_dsa = ext_dsa;
326    cmd->data_external_mem = data;
327 
328    if (copy_data) {
329       char *variable_data = (char *) (cmd + 1);
330       memcpy(variable_data, data, size);
331    }
332 }
333 
334 void GLAPIENTRY
_mesa_marshal_BufferData(GLenum target,GLsizeiptr size,const GLvoid * data,GLenum usage)335 _mesa_marshal_BufferData(GLenum target, GLsizeiptr size, const GLvoid * data,
336                          GLenum usage)
337 {
338    _mesa_marshal_BufferData_merged(target, size, data, usage, false, false,
339                                    "BufferData");
340 }
341 
342 void GLAPIENTRY
_mesa_marshal_NamedBufferData(GLuint buffer,GLsizeiptr size,const GLvoid * data,GLenum usage)343 _mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size,
344                               const GLvoid * data, GLenum usage)
345 {
346    _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, false,
347                                    "NamedBufferData");
348 }
349 
350 void GLAPIENTRY
_mesa_marshal_NamedBufferDataEXT(GLuint buffer,GLsizeiptr size,const GLvoid * data,GLenum usage)351 _mesa_marshal_NamedBufferDataEXT(GLuint buffer, GLsizeiptr size,
352                                  const GLvoid *data, GLenum usage)
353 {
354    _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, true,
355                                    "NamedBufferDataEXT");
356 }
357 
358 
359 /* BufferSubData: marshalled asynchronously */
360 struct marshal_cmd_BufferSubData
361 {
362    struct marshal_cmd_base cmd_base;
363    GLenum target_or_name;
364    GLintptr offset;
365    GLsizeiptr size;
366    bool named;
367    bool ext_dsa;
368    /* Next size bytes are GLubyte data[size] */
369 };
370 
371 uint32_t
_mesa_unmarshal_BufferSubData(struct gl_context * ctx,const struct marshal_cmd_BufferSubData * cmd,const uint64_t * last)372 _mesa_unmarshal_BufferSubData(struct gl_context *ctx,
373                               const struct marshal_cmd_BufferSubData *cmd,
374                               const uint64_t *last)
375 {
376    const GLenum target_or_name = cmd->target_or_name;
377    const GLintptr offset = cmd->offset;
378    const GLsizeiptr size = cmd->size;
379    const void *data = (const void *) (cmd + 1);
380 
381    if (cmd->ext_dsa) {
382       CALL_NamedBufferSubDataEXT(ctx->CurrentServerDispatch,
383                                  (target_or_name, offset, size, data));
384    } else if (cmd->named) {
385       CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
386                               (target_or_name, offset, size, data));
387    } else {
388       CALL_BufferSubData(ctx->CurrentServerDispatch,
389                          (target_or_name, offset, size, data));
390    }
391    return cmd->cmd_base.cmd_size;
392 }
393 
394 uint32_t
_mesa_unmarshal_NamedBufferSubData(struct gl_context * ctx,const struct marshal_cmd_NamedBufferSubData * cmd,const uint64_t * last)395 _mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx,
396                                    const struct marshal_cmd_NamedBufferSubData *cmd,
397                                    const uint64_t *last)
398 {
399    unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
400    return 0;
401 }
402 
403 uint32_t
_mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context * ctx,const struct marshal_cmd_NamedBufferSubDataEXT * cmd,const uint64_t * last)404 _mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context *ctx,
405                                       const struct marshal_cmd_NamedBufferSubDataEXT *cmd,
406                                       const uint64_t *last)
407 {
408    unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
409    return 0;
410 }
411 
412 static void
_mesa_marshal_BufferSubData_merged(GLuint target_or_name,GLintptr offset,GLsizeiptr size,const GLvoid * data,bool named,bool ext_dsa,const char * func)413 _mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
414                                    GLsizeiptr size, const GLvoid *data,
415                                    bool named, bool ext_dsa, const char *func)
416 {
417    GET_CURRENT_CONTEXT(ctx);
418    size_t cmd_size = sizeof(struct marshal_cmd_BufferSubData) + size;
419 
420    /* Fast path: Copy the data to an upload buffer, and use the GPU
421     * to copy the uploaded data to the destination buffer.
422     */
423    /* TODO: Handle offset == 0 && size < buffer_size.
424     *       If offset == 0 and size == buffer_size, it's better to discard
425     *       the buffer storage, but we don't know the buffer size in glthread.
426     */
427    if (ctx->GLThread.SupportsBufferUploads &&
428        data && offset > 0 && size > 0) {
429       struct gl_buffer_object *upload_buffer = NULL;
430       unsigned upload_offset = 0;
431 
432       _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer,
433                             NULL);
434 
435       if (upload_buffer) {
436          _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer,
437                                                      upload_offset,
438                                                      target_or_name,
439                                                      offset, size, named,
440                                                      ext_dsa);
441          return;
442       }
443    }
444 
445    if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 ||
446                 cmd_size > MARSHAL_MAX_CMD_SIZE || !data ||
447                 (named && target_or_name == 0))) {
448       _mesa_glthread_finish_before(ctx, func);
449       if (named) {
450          CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
451                                  (target_or_name, offset, size, data));
452       } else {
453          CALL_BufferSubData(ctx->CurrentServerDispatch,
454                             (target_or_name, offset, size, data));
455       }
456       return;
457    }
458 
459    struct marshal_cmd_BufferSubData *cmd =
460       _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferSubData,
461                                       cmd_size);
462    cmd->target_or_name = target_or_name;
463    cmd->offset = offset;
464    cmd->size = size;
465    cmd->named = named;
466    cmd->ext_dsa = ext_dsa;
467 
468    char *variable_data = (char *) (cmd + 1);
469    memcpy(variable_data, data, size);
470 }
471 
472 void GLAPIENTRY
_mesa_marshal_BufferSubData(GLenum target,GLintptr offset,GLsizeiptr size,const GLvoid * data)473 _mesa_marshal_BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
474                             const GLvoid * data)
475 {
476    _mesa_marshal_BufferSubData_merged(target, offset, size, data, false,
477                                       false, "BufferSubData");
478 }
479 
480 void GLAPIENTRY
_mesa_marshal_NamedBufferSubData(GLuint buffer,GLintptr offset,GLsizeiptr size,const GLvoid * data)481 _mesa_marshal_NamedBufferSubData(GLuint buffer, GLintptr offset,
482                                  GLsizeiptr size, const GLvoid * data)
483 {
484    _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
485                                       false, "NamedBufferSubData");
486 }
487 
488 void GLAPIENTRY
_mesa_marshal_NamedBufferSubDataEXT(GLuint buffer,GLintptr offset,GLsizeiptr size,const GLvoid * data)489 _mesa_marshal_NamedBufferSubDataEXT(GLuint buffer, GLintptr offset,
490                                     GLsizeiptr size, const GLvoid * data)
491 {
492    _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
493                                       true, "NamedBufferSubDataEXT");
494 }
495