1 
2 #include "util/u_inlines.h"
3 #include "util/u_memory.h"
4 #include "util/u_math.h"
5 #include "util/u_surface.h"
6 
7 #include "nouveau_screen.h"
8 #include "nouveau_context.h"
9 #include "nouveau_winsys.h"
10 #include "nouveau_fence.h"
11 #include "nouveau_buffer.h"
12 #include "nouveau_mm.h"
13 
14 struct nouveau_transfer {
15    struct pipe_transfer base;
16 
17    uint8_t *map;
18    struct nouveau_bo *bo;
19    struct nouveau_mm_allocation *mm;
20    uint32_t offset;
21 };
22 
23 static void *
24 nouveau_user_ptr_transfer_map(struct pipe_context *pipe,
25                               struct pipe_resource *resource,
26                               unsigned level, unsigned usage,
27                               const struct pipe_box *box,
28                               struct pipe_transfer **ptransfer);
29 
30 static void
31 nouveau_user_ptr_transfer_unmap(struct pipe_context *pipe,
32                                 struct pipe_transfer *transfer);
33 
34 static inline struct nouveau_transfer *
nouveau_transfer(struct pipe_transfer * transfer)35 nouveau_transfer(struct pipe_transfer *transfer)
36 {
37    return (struct nouveau_transfer *)transfer;
38 }
39 
40 static inline bool
nouveau_buffer_malloc(struct nv04_resource * buf)41 nouveau_buffer_malloc(struct nv04_resource *buf)
42 {
43    if (!buf->data)
44       buf->data = align_malloc(buf->base.width0, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
45    return !!buf->data;
46 }
47 
48 static inline bool
nouveau_buffer_allocate(struct nouveau_screen * screen,struct nv04_resource * buf,unsigned domain)49 nouveau_buffer_allocate(struct nouveau_screen *screen,
50                         struct nv04_resource *buf, unsigned domain)
51 {
52    uint32_t size = align(buf->base.width0, 0x100);
53 
54    if (domain == NOUVEAU_BO_VRAM) {
55       buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size,
56                                     &buf->bo, &buf->offset);
57       if (!buf->bo)
58          return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART);
59       NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_vid, buf->base.width0);
60    } else
61    if (domain == NOUVEAU_BO_GART) {
62       buf->mm = nouveau_mm_allocate(screen->mm_GART, size,
63                                     &buf->bo, &buf->offset);
64       if (!buf->bo)
65          return false;
66       NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0);
67    } else {
68       assert(domain == 0);
69       if (!nouveau_buffer_malloc(buf))
70          return false;
71    }
72    buf->domain = domain;
73    if (buf->bo)
74       buf->address = buf->bo->offset + buf->offset;
75 
76    util_range_set_empty(&buf->valid_buffer_range);
77 
78    return true;
79 }
80 
81 static inline void
release_allocation(struct nouveau_mm_allocation ** mm,struct nouveau_fence * fence)82 release_allocation(struct nouveau_mm_allocation **mm,
83                    struct nouveau_fence *fence)
84 {
85    nouveau_fence_work(fence, nouveau_mm_free_work, *mm);
86    (*mm) = NULL;
87 }
88 
89 inline void
nouveau_buffer_release_gpu_storage(struct nv04_resource * buf)90 nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
91 {
92    assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
93 
94    if (buf->fence && buf->fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
95       nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
96       buf->bo = NULL;
97    } else {
98       nouveau_bo_ref(NULL, &buf->bo);
99    }
100 
101    if (buf->mm)
102       release_allocation(&buf->mm, buf->fence);
103 
104    if (buf->domain == NOUVEAU_BO_VRAM)
105       NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_vid, -(uint64_t)buf->base.width0);
106    if (buf->domain == NOUVEAU_BO_GART)
107       NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_sys, -(uint64_t)buf->base.width0);
108 
109    buf->domain = 0;
110 }
111 
112 static inline bool
nouveau_buffer_reallocate(struct nouveau_screen * screen,struct nv04_resource * buf,unsigned domain)113 nouveau_buffer_reallocate(struct nouveau_screen *screen,
114                           struct nv04_resource *buf, unsigned domain)
115 {
116    nouveau_buffer_release_gpu_storage(buf);
117 
118    nouveau_fence_ref(NULL, &buf->fence);
119    nouveau_fence_ref(NULL, &buf->fence_wr);
120 
121    buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
122 
123    return nouveau_buffer_allocate(screen, buf, domain);
124 }
125 
126 void
nouveau_buffer_destroy(struct pipe_screen * pscreen,struct pipe_resource * presource)127 nouveau_buffer_destroy(struct pipe_screen *pscreen,
128                        struct pipe_resource *presource)
129 {
130    struct nv04_resource *res = nv04_resource(presource);
131 
132    if (res->status & NOUVEAU_BUFFER_STATUS_USER_PTR) {
133       FREE(res);
134       return;
135    }
136 
137    nouveau_buffer_release_gpu_storage(res);
138 
139    if (res->data && !(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
140       align_free(res->data);
141 
142    nouveau_fence_ref(NULL, &res->fence);
143    nouveau_fence_ref(NULL, &res->fence_wr);
144 
145    util_range_destroy(&res->valid_buffer_range);
146 
147    FREE(res);
148 
149    NOUVEAU_DRV_STAT(nouveau_screen(pscreen), buf_obj_current_count, -1);
150 }
151 
152 /* Set up a staging area for the transfer. This is either done in "regular"
153  * system memory if the driver supports push_data (nv50+) and the data is
154  * small enough (and permit_pb == true), or in GART memory.
155  */
156 static uint8_t *
nouveau_transfer_staging(struct nouveau_context * nv,struct nouveau_transfer * tx,bool permit_pb)157 nouveau_transfer_staging(struct nouveau_context *nv,
158                          struct nouveau_transfer *tx, bool permit_pb)
159 {
160    const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK;
161    const unsigned size = align(tx->base.box.width, 4) + adj;
162 
163    if (!nv->push_data)
164       permit_pb = false;
165 
166    if ((size <= nv->screen->transfer_pushbuf_threshold) && permit_pb) {
167       tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
168       if (tx->map)
169          tx->map += adj;
170    } else {
171       tx->mm =
172          nouveau_mm_allocate(nv->screen->mm_GART, size, &tx->bo, &tx->offset);
173       if (tx->bo) {
174          tx->offset += adj;
175          if (!nouveau_bo_map(tx->bo, 0, NULL))
176             tx->map = (uint8_t *)tx->bo->map + tx->offset;
177       }
178    }
179    return tx->map;
180 }
181 
182 /* Copies data from the resource into the transfer's temporary GART
183  * buffer. Also updates buf->data if present.
184  *
185  * Maybe just migrate to GART right away if we actually need to do this. */
186 static bool
nouveau_transfer_read(struct nouveau_context * nv,struct nouveau_transfer * tx)187 nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
188 {
189    struct nv04_resource *buf = nv04_resource(tx->base.resource);
190    const unsigned base = tx->base.box.x;
191    const unsigned size = tx->base.box.width;
192 
193    NOUVEAU_DRV_STAT(nv->screen, buf_read_bytes_staging_vid, size);
194 
195    nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART,
196                  buf->bo, buf->offset + base, buf->domain, size);
197 
198    if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client))
199       return false;
200 
201    if (buf->data)
202       memcpy(buf->data + base, tx->map, size);
203 
204    return true;
205 }
206 
207 static void
nouveau_transfer_write(struct nouveau_context * nv,struct nouveau_transfer * tx,unsigned offset,unsigned size)208 nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
209                        unsigned offset, unsigned size)
210 {
211    struct nv04_resource *buf = nv04_resource(tx->base.resource);
212    uint8_t *data = tx->map + offset;
213    const unsigned base = tx->base.box.x + offset;
214    const bool can_cb = !((base | size) & 3);
215 
216    if (buf->data)
217       memcpy(data, buf->data + base, size);
218    else
219       buf->status |= NOUVEAU_BUFFER_STATUS_DIRTY;
220 
221    if (buf->domain == NOUVEAU_BO_VRAM)
222       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_vid, size);
223    if (buf->domain == NOUVEAU_BO_GART)
224       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_sys, size);
225 
226    if (tx->bo)
227       nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
228                     tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
229    else
230    if (nv->push_cb && can_cb)
231       nv->push_cb(nv, buf,
232                   base, size / 4, (const uint32_t *)data);
233    else
234       nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
235 
236    nouveau_fence_ref(nv->screen->fence.current, &buf->fence);
237    nouveau_fence_ref(nv->screen->fence.current, &buf->fence_wr);
238 }
239 
240 /* Does a CPU wait for the buffer's backing data to become reliably accessible
241  * for write/read by waiting on the buffer's relevant fences.
242  */
243 static inline bool
nouveau_buffer_sync(struct nouveau_context * nv,struct nv04_resource * buf,unsigned rw)244 nouveau_buffer_sync(struct nouveau_context *nv,
245                     struct nv04_resource *buf, unsigned rw)
246 {
247    if (rw == PIPE_MAP_READ) {
248       if (!buf->fence_wr)
249          return true;
250       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
251                            !nouveau_fence_signalled(buf->fence_wr));
252       if (!nouveau_fence_wait(buf->fence_wr, &nv->debug))
253          return false;
254    } else {
255       if (!buf->fence)
256          return true;
257       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
258                            !nouveau_fence_signalled(buf->fence));
259       if (!nouveau_fence_wait(buf->fence, &nv->debug))
260          return false;
261 
262       nouveau_fence_ref(NULL, &buf->fence);
263    }
264    nouveau_fence_ref(NULL, &buf->fence_wr);
265 
266    return true;
267 }
268 
269 static inline bool
nouveau_buffer_busy(struct nv04_resource * buf,unsigned rw)270 nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
271 {
272    if (rw == PIPE_MAP_READ)
273       return (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr));
274    else
275       return (buf->fence && !nouveau_fence_signalled(buf->fence));
276 }
277 
278 static inline void
nouveau_buffer_transfer_init(struct nouveau_transfer * tx,struct pipe_resource * resource,const struct pipe_box * box,unsigned usage)279 nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
280                              struct pipe_resource *resource,
281                              const struct pipe_box *box,
282                              unsigned usage)
283 {
284    tx->base.resource = resource;
285    tx->base.level = 0;
286    tx->base.usage = usage;
287    tx->base.box.x = box->x;
288    tx->base.box.y = 0;
289    tx->base.box.z = 0;
290    tx->base.box.width = box->width;
291    tx->base.box.height = 1;
292    tx->base.box.depth = 1;
293    tx->base.stride = 0;
294    tx->base.layer_stride = 0;
295 
296    tx->bo = NULL;
297    tx->map = NULL;
298 }
299 
300 static inline void
nouveau_buffer_transfer_del(struct nouveau_context * nv,struct nouveau_transfer * tx)301 nouveau_buffer_transfer_del(struct nouveau_context *nv,
302                             struct nouveau_transfer *tx)
303 {
304    if (tx->map) {
305       if (likely(tx->bo)) {
306          nouveau_fence_work(nv->screen->fence.current,
307                             nouveau_fence_unref_bo, tx->bo);
308          if (tx->mm)
309             release_allocation(&tx->mm, nv->screen->fence.current);
310       } else {
311          align_free(tx->map -
312                     (tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK));
313       }
314    }
315 }
316 
317 /* Creates a cache in system memory of the buffer data. */
318 static bool
nouveau_buffer_cache(struct nouveau_context * nv,struct nv04_resource * buf)319 nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
320 {
321    struct nouveau_transfer tx;
322    bool ret;
323    tx.base.resource = &buf->base;
324    tx.base.box.x = 0;
325    tx.base.box.width = buf->base.width0;
326    tx.bo = NULL;
327    tx.map = NULL;
328 
329    if (!buf->data)
330       if (!nouveau_buffer_malloc(buf))
331          return false;
332    if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY))
333       return true;
334    nv->stats.buf_cache_count++;
335 
336    if (!nouveau_transfer_staging(nv, &tx, false))
337       return false;
338 
339    ret = nouveau_transfer_read(nv, &tx);
340    if (ret) {
341       buf->status &= ~NOUVEAU_BUFFER_STATUS_DIRTY;
342       memcpy(buf->data, tx.map, buf->base.width0);
343    }
344    nouveau_buffer_transfer_del(nv, &tx);
345    return ret;
346 }
347 
348 
349 #define NOUVEAU_TRANSFER_DISCARD \
350    (PIPE_MAP_DISCARD_RANGE | PIPE_MAP_DISCARD_WHOLE_RESOURCE)
351 
352 /* Checks whether it is possible to completely discard the memory backing this
353  * resource. This can be useful if we would otherwise have to wait for a read
354  * operation to complete on this data.
355  */
356 static inline bool
nouveau_buffer_should_discard(struct nv04_resource * buf,unsigned usage)357 nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage)
358 {
359    if (!(usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE))
360       return false;
361    if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
362       return false;
363    if (unlikely(usage & PIPE_MAP_PERSISTENT))
364       return false;
365    return buf->mm && nouveau_buffer_busy(buf, PIPE_MAP_WRITE);
366 }
367 
368 /* Returns a pointer to a memory area representing a window into the
369  * resource's data.
370  *
371  * This may or may not be the _actual_ memory area of the resource. However
372  * when calling nouveau_buffer_transfer_unmap, if it wasn't the actual memory
373  * area, the contents of the returned map are copied over to the resource.
374  *
375  * The usage indicates what the caller plans to do with the map:
376  *
377  *   WRITE means that the user plans to write to it
378  *
379  *   READ means that the user plans on reading from it
380  *
381  *   DISCARD_WHOLE_RESOURCE means that the whole resource is going to be
382  *   potentially overwritten, and even if it isn't, the bits that aren't don't
383  *   need to be maintained.
384  *
385  *   DISCARD_RANGE means that all the data in the specified range is going to
386  *   be overwritten.
387  *
388  * The strategy for determining what kind of memory area to return is complex,
389  * see comments inside of the function.
390  */
391 void *
nouveau_buffer_transfer_map(struct pipe_context * pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** ptransfer)392 nouveau_buffer_transfer_map(struct pipe_context *pipe,
393                             struct pipe_resource *resource,
394                             unsigned level, unsigned usage,
395                             const struct pipe_box *box,
396                             struct pipe_transfer **ptransfer)
397 {
398    struct nouveau_context *nv = nouveau_context(pipe);
399    struct nv04_resource *buf = nv04_resource(resource);
400 
401    if (buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR)
402       return nouveau_user_ptr_transfer_map(pipe, resource, level, usage, box, ptransfer);
403 
404    struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
405    uint8_t *map;
406    int ret;
407 
408    if (!tx)
409       return NULL;
410    nouveau_buffer_transfer_init(tx, resource, box, usage);
411    *ptransfer = &tx->base;
412 
413    if (usage & PIPE_MAP_READ)
414       NOUVEAU_DRV_STAT(nv->screen, buf_transfers_rd, 1);
415    if (usage & PIPE_MAP_WRITE)
416       NOUVEAU_DRV_STAT(nv->screen, buf_transfers_wr, 1);
417 
418    /* If we are trying to write to an uninitialized range, the user shouldn't
419     * care what was there before. So we can treat the write as if the target
420     * range were being discarded. Furthermore, since we know that even if this
421     * buffer is busy due to GPU activity, because the contents were
422     * uninitialized, the GPU can't care what was there, and so we can treat
423     * the write as being unsynchronized.
424     */
425    if ((usage & PIPE_MAP_WRITE) &&
426        !util_ranges_intersect(&buf->valid_buffer_range, box->x, box->x + box->width))
427       usage |= PIPE_MAP_DISCARD_RANGE | PIPE_MAP_UNSYNCHRONIZED;
428 
429    if (buf->domain == NOUVEAU_BO_VRAM) {
430       if (usage & NOUVEAU_TRANSFER_DISCARD) {
431          /* Set up a staging area for the user to write to. It will be copied
432           * back into VRAM on unmap. */
433          if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
434             buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
435          nouveau_transfer_staging(nv, tx, true);
436       } else {
437          if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
438             /* The GPU is currently writing to this buffer. Copy its current
439              * contents to a staging area in the GART. This is necessary since
440              * not the whole area being mapped is being discarded.
441              */
442             if (buf->data) {
443                align_free(buf->data);
444                buf->data = NULL;
445             }
446             nouveau_transfer_staging(nv, tx, false);
447             nouveau_transfer_read(nv, tx);
448          } else {
449             /* The buffer is currently idle. Create a staging area for writes,
450              * and make sure that the cached data is up-to-date. */
451             if (usage & PIPE_MAP_WRITE)
452                nouveau_transfer_staging(nv, tx, true);
453             if (!buf->data)
454                nouveau_buffer_cache(nv, buf);
455          }
456       }
457       return buf->data ? (buf->data + box->x) : tx->map;
458    } else
459    if (unlikely(buf->domain == 0)) {
460       return buf->data + box->x;
461    }
462 
463    /* At this point, buf->domain == GART */
464 
465    if (nouveau_buffer_should_discard(buf, usage)) {
466       int ref = buf->base.reference.count - 1;
467       nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
468       if (ref > 0) /* any references inside context possible ? */
469          nv->invalidate_resource_storage(nv, &buf->base, ref);
470    }
471 
472    /* Note that nouveau_bo_map ends up doing a nouveau_bo_wait with the
473     * relevant flags. If buf->mm is set, that means this resource is part of a
474     * larger slab bo that holds multiple resources. So in that case, don't
475     * wait on the whole slab and instead use the logic below to return a
476     * reasonable buffer for that case.
477     */
478    ret = nouveau_bo_map(buf->bo,
479                         buf->mm ? 0 : nouveau_screen_transfer_flags(usage),
480                         nv->client);
481    if (ret) {
482       FREE(tx);
483       return NULL;
484    }
485    map = (uint8_t *)buf->bo->map + buf->offset + box->x;
486 
487    /* using kernel fences only if !buf->mm */
488    if ((usage & PIPE_MAP_UNSYNCHRONIZED) || !buf->mm)
489       return map;
490 
491    /* If the GPU is currently reading/writing this buffer, we shouldn't
492     * interfere with its progress. So instead we either wait for the GPU to
493     * complete its operation, or set up a staging area to perform our work in.
494     */
495    if (nouveau_buffer_busy(buf, usage & PIPE_MAP_READ_WRITE)) {
496       if (unlikely(usage & (PIPE_MAP_DISCARD_WHOLE_RESOURCE |
497                             PIPE_MAP_PERSISTENT))) {
498          /* Discarding was not possible, must sync because
499           * subsequent transfers might use UNSYNCHRONIZED. */
500          nouveau_buffer_sync(nv, buf, usage & PIPE_MAP_READ_WRITE);
501       } else
502       if (usage & PIPE_MAP_DISCARD_RANGE) {
503          /* The whole range is being discarded, so it doesn't matter what was
504           * there before. No need to copy anything over. */
505          nouveau_transfer_staging(nv, tx, true);
506          map = tx->map;
507       } else
508       if (nouveau_buffer_busy(buf, PIPE_MAP_READ)) {
509          if (usage & PIPE_MAP_DONTBLOCK)
510             map = NULL;
511          else
512             nouveau_buffer_sync(nv, buf, usage & PIPE_MAP_READ_WRITE);
513       } else {
514          /* It is expected that the returned buffer be a representation of the
515           * data in question, so we must copy it over from the buffer. */
516          nouveau_transfer_staging(nv, tx, true);
517          if (tx->map)
518             memcpy(tx->map, map, box->width);
519          map = tx->map;
520       }
521    }
522    if (!map)
523       FREE(tx);
524    return map;
525 }
526 
527 
528 
529 void
nouveau_buffer_transfer_flush_region(struct pipe_context * pipe,struct pipe_transfer * transfer,const struct pipe_box * box)530 nouveau_buffer_transfer_flush_region(struct pipe_context *pipe,
531                                      struct pipe_transfer *transfer,
532                                      const struct pipe_box *box)
533 {
534    struct nouveau_transfer *tx = nouveau_transfer(transfer);
535    struct nv04_resource *buf = nv04_resource(transfer->resource);
536 
537    if (tx->map)
538       nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
539 
540    util_range_add(&buf->base, &buf->valid_buffer_range,
541                   tx->base.box.x + box->x,
542                   tx->base.box.x + box->x + box->width);
543 }
544 
545 /* Unmap stage of the transfer. If it was a WRITE transfer and the map that
546  * was returned was not the real resource's data, this needs to transfer the
547  * data back to the resource.
548  *
549  * Also marks vbo dirty based on the buffer's binding
550  */
551 void
nouveau_buffer_transfer_unmap(struct pipe_context * pipe,struct pipe_transfer * transfer)552 nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
553                               struct pipe_transfer *transfer)
554 {
555    struct nouveau_context *nv = nouveau_context(pipe);
556    struct nv04_resource *buf = nv04_resource(transfer->resource);
557 
558    if (buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR)
559       return nouveau_user_ptr_transfer_unmap(pipe, transfer);
560 
561    struct nouveau_transfer *tx = nouveau_transfer(transfer);
562 
563    if (tx->base.usage & PIPE_MAP_WRITE) {
564       if (!(tx->base.usage & PIPE_MAP_FLUSH_EXPLICIT)) {
565          if (tx->map)
566             nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
567 
568          util_range_add(&buf->base, &buf->valid_buffer_range,
569                         tx->base.box.x, tx->base.box.x + tx->base.box.width);
570       }
571 
572       if (likely(buf->domain)) {
573          const uint8_t bind = buf->base.bind;
574          /* make sure we invalidate dedicated caches */
575          if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
576             nv->vbo_dirty = true;
577       }
578    }
579 
580    if (!tx->bo && (tx->base.usage & PIPE_MAP_WRITE))
581       NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_direct, tx->base.box.width);
582 
583    nouveau_buffer_transfer_del(nv, tx);
584    FREE(tx);
585 }
586 
587 
588 void
nouveau_copy_buffer(struct nouveau_context * nv,struct nv04_resource * dst,unsigned dstx,struct nv04_resource * src,unsigned srcx,unsigned size)589 nouveau_copy_buffer(struct nouveau_context *nv,
590                     struct nv04_resource *dst, unsigned dstx,
591                     struct nv04_resource *src, unsigned srcx, unsigned size)
592 {
593    assert(dst->base.target == PIPE_BUFFER && src->base.target == PIPE_BUFFER);
594 
595    assert(!(dst->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
596    assert(!(src->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
597 
598    if (likely(dst->domain) && likely(src->domain)) {
599       nv->copy_data(nv,
600                     dst->bo, dst->offset + dstx, dst->domain,
601                     src->bo, src->offset + srcx, src->domain, size);
602 
603       dst->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
604       nouveau_fence_ref(nv->screen->fence.current, &dst->fence);
605       nouveau_fence_ref(nv->screen->fence.current, &dst->fence_wr);
606 
607       src->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
608       nouveau_fence_ref(nv->screen->fence.current, &src->fence);
609    } else {
610       struct pipe_box src_box;
611       src_box.x = srcx;
612       src_box.y = 0;
613       src_box.z = 0;
614       src_box.width = size;
615       src_box.height = 1;
616       src_box.depth = 1;
617       util_resource_copy_region(&nv->pipe,
618                                 &dst->base, 0, dstx, 0, 0,
619                                 &src->base, 0, &src_box);
620    }
621 
622    util_range_add(&dst->base, &dst->valid_buffer_range, dstx, dstx + size);
623 }
624 
625 
626 void *
nouveau_resource_map_offset(struct nouveau_context * nv,struct nv04_resource * res,uint32_t offset,uint32_t flags)627 nouveau_resource_map_offset(struct nouveau_context *nv,
628                             struct nv04_resource *res, uint32_t offset,
629                             uint32_t flags)
630 {
631    if (unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) ||
632        unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_PTR))
633       return res->data + offset;
634 
635    if (res->domain == NOUVEAU_BO_VRAM) {
636       if (!res->data || (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING))
637          nouveau_buffer_cache(nv, res);
638    }
639    if (res->domain != NOUVEAU_BO_GART)
640       return res->data + offset;
641 
642    if (res->mm) {
643       unsigned rw;
644       rw = (flags & NOUVEAU_BO_WR) ? PIPE_MAP_WRITE : PIPE_MAP_READ;
645       nouveau_buffer_sync(nv, res, rw);
646       if (nouveau_bo_map(res->bo, 0, NULL))
647          return NULL;
648    } else {
649       if (nouveau_bo_map(res->bo, flags, nv->client))
650          return NULL;
651    }
652    return (uint8_t *)res->bo->map + res->offset + offset;
653 }
654 
655 static void *
nouveau_user_ptr_transfer_map(struct pipe_context * pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** ptransfer)656 nouveau_user_ptr_transfer_map(struct pipe_context *pipe,
657                               struct pipe_resource *resource,
658                               unsigned level, unsigned usage,
659                               const struct pipe_box *box,
660                               struct pipe_transfer **ptransfer)
661 {
662    struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
663    if (!tx)
664       return NULL;
665    nouveau_buffer_transfer_init(tx, resource, box, usage);
666    *ptransfer = &tx->base;
667    return nv04_resource(resource)->data;
668 }
669 
670 static void
nouveau_user_ptr_transfer_unmap(struct pipe_context * pipe,struct pipe_transfer * transfer)671 nouveau_user_ptr_transfer_unmap(struct pipe_context *pipe,
672                                 struct pipe_transfer *transfer)
673 {
674    struct nouveau_transfer *tx = nouveau_transfer(transfer);
675    FREE(tx);
676 }
677 
678 struct pipe_resource *
nouveau_buffer_create(struct pipe_screen * pscreen,const struct pipe_resource * templ)679 nouveau_buffer_create(struct pipe_screen *pscreen,
680                       const struct pipe_resource *templ)
681 {
682    struct nouveau_screen *screen = nouveau_screen(pscreen);
683    struct nv04_resource *buffer;
684    bool ret;
685 
686    buffer = CALLOC_STRUCT(nv04_resource);
687    if (!buffer)
688       return NULL;
689 
690    buffer->base = *templ;
691    pipe_reference_init(&buffer->base.reference, 1);
692    buffer->base.screen = pscreen;
693 
694    if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
695                              PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
696       buffer->domain = NOUVEAU_BO_GART;
697    } else if (buffer->base.bind == 0 || (buffer->base.bind &
698               (screen->vidmem_bindings & screen->sysmem_bindings))) {
699       switch (buffer->base.usage) {
700       case PIPE_USAGE_DEFAULT:
701       case PIPE_USAGE_IMMUTABLE:
702          buffer->domain = NV_VRAM_DOMAIN(screen);
703          break;
704       case PIPE_USAGE_DYNAMIC:
705          /* For most apps, we'd have to do staging transfers to avoid sync
706           * with this usage, and GART -> GART copies would be suboptimal.
707           */
708          buffer->domain = NV_VRAM_DOMAIN(screen);
709          break;
710       case PIPE_USAGE_STAGING:
711       case PIPE_USAGE_STREAM:
712          buffer->domain = NOUVEAU_BO_GART;
713          break;
714       default:
715          assert(0);
716          break;
717       }
718    } else {
719       if (buffer->base.bind & screen->vidmem_bindings)
720          buffer->domain = NV_VRAM_DOMAIN(screen);
721       else
722       if (buffer->base.bind & screen->sysmem_bindings)
723          buffer->domain = NOUVEAU_BO_GART;
724    }
725 
726    ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
727 
728    if (ret == false)
729       goto fail;
730 
731    if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy)
732       nouveau_buffer_cache(NULL, buffer);
733 
734    NOUVEAU_DRV_STAT(screen, buf_obj_current_count, 1);
735 
736    util_range_init(&buffer->valid_buffer_range);
737 
738    return &buffer->base;
739 
740 fail:
741    FREE(buffer);
742    return NULL;
743 }
744 
745 struct pipe_resource *
nouveau_buffer_create_from_user(struct pipe_screen * pscreen,const struct pipe_resource * templ,void * user_ptr)746 nouveau_buffer_create_from_user(struct pipe_screen *pscreen,
747                                 const struct pipe_resource *templ,
748                                 void *user_ptr)
749 {
750    struct nv04_resource *buffer;
751 
752    buffer = CALLOC_STRUCT(nv04_resource);
753    if (!buffer)
754       return NULL;
755 
756    buffer->base = *templ;
757    /* set address and data to the same thing for higher compatibility with
758     * existing code. It's correct nonetheless as the same pointer is equally
759     * valid on the CPU and the GPU.
760     */
761    buffer->address = (uintptr_t)user_ptr;
762    buffer->data = user_ptr;
763    buffer->status = NOUVEAU_BUFFER_STATUS_USER_PTR;
764    buffer->base.screen = pscreen;
765 
766    pipe_reference_init(&buffer->base.reference, 1);
767 
768    return &buffer->base;
769 }
770 
771 struct pipe_resource *
nouveau_user_buffer_create(struct pipe_screen * pscreen,void * ptr,unsigned bytes,unsigned bind)772 nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr,
773                            unsigned bytes, unsigned bind)
774 {
775    struct nv04_resource *buffer;
776 
777    buffer = CALLOC_STRUCT(nv04_resource);
778    if (!buffer)
779       return NULL;
780 
781    pipe_reference_init(&buffer->base.reference, 1);
782    buffer->base.screen = pscreen;
783    buffer->base.format = PIPE_FORMAT_R8_UNORM;
784    buffer->base.usage = PIPE_USAGE_IMMUTABLE;
785    buffer->base.bind = bind;
786    buffer->base.width0 = bytes;
787    buffer->base.height0 = 1;
788    buffer->base.depth0 = 1;
789 
790    buffer->data = ptr;
791    buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY;
792 
793    util_range_init(&buffer->valid_buffer_range);
794    util_range_add(&buffer->base, &buffer->valid_buffer_range, 0, bytes);
795 
796    return &buffer->base;
797 }
798 
799 static inline bool
nouveau_buffer_data_fetch(struct nouveau_context * nv,struct nv04_resource * buf,struct nouveau_bo * bo,unsigned offset,unsigned size)800 nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf,
801                           struct nouveau_bo *bo, unsigned offset, unsigned size)
802 {
803    if (!nouveau_buffer_malloc(buf))
804       return false;
805    if (nouveau_bo_map(bo, NOUVEAU_BO_RD, nv->client))
806       return false;
807    memcpy(buf->data, (uint8_t *)bo->map + offset, size);
808    return true;
809 }
810 
811 /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
812 bool
nouveau_buffer_migrate(struct nouveau_context * nv,struct nv04_resource * buf,const unsigned new_domain)813 nouveau_buffer_migrate(struct nouveau_context *nv,
814                        struct nv04_resource *buf, const unsigned new_domain)
815 {
816    assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
817 
818    struct nouveau_screen *screen = nv->screen;
819    struct nouveau_bo *bo;
820    const unsigned old_domain = buf->domain;
821    unsigned size = buf->base.width0;
822    unsigned offset;
823    int ret;
824 
825    assert(new_domain != old_domain);
826 
827    if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
828       if (!nouveau_buffer_allocate(screen, buf, new_domain))
829          return false;
830       ret = nouveau_bo_map(buf->bo, 0, nv->client);
831       if (ret)
832          return ret;
833       memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size);
834       align_free(buf->data);
835    } else
836    if (old_domain != 0 && new_domain != 0) {
837       struct nouveau_mm_allocation *mm = buf->mm;
838 
839       if (new_domain == NOUVEAU_BO_VRAM) {
840          /* keep a system memory copy of our data in case we hit a fallback */
841          if (!nouveau_buffer_data_fetch(nv, buf, buf->bo, buf->offset, size))
842             return false;
843          if (nouveau_mesa_debug)
844             debug_printf("migrating %u KiB to VRAM\n", size / 1024);
845       }
846 
847       offset = buf->offset;
848       bo = buf->bo;
849       buf->bo = NULL;
850       buf->mm = NULL;
851       nouveau_buffer_allocate(screen, buf, new_domain);
852 
853       nv->copy_data(nv, buf->bo, buf->offset, new_domain,
854                     bo, offset, old_domain, buf->base.width0);
855 
856       nouveau_fence_work(screen->fence.current, nouveau_fence_unref_bo, bo);
857       if (mm)
858          release_allocation(&mm, screen->fence.current);
859    } else
860    if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
861       struct nouveau_transfer tx;
862       if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
863          return false;
864       tx.base.resource = &buf->base;
865       tx.base.box.x = 0;
866       tx.base.box.width = buf->base.width0;
867       tx.bo = NULL;
868       tx.map = NULL;
869       if (!nouveau_transfer_staging(nv, &tx, false))
870          return false;
871       nouveau_transfer_write(nv, &tx, 0, tx.base.box.width);
872       nouveau_buffer_transfer_del(nv, &tx);
873    } else
874       return false;
875 
876    assert(buf->domain == new_domain);
877    return true;
878 }
879 
880 /* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
881  * We'd like to only allocate @size bytes here, but then we'd have to rebase
882  * the vertex indices ...
883  */
884 bool
nouveau_user_buffer_upload(struct nouveau_context * nv,struct nv04_resource * buf,unsigned base,unsigned size)885 nouveau_user_buffer_upload(struct nouveau_context *nv,
886                            struct nv04_resource *buf,
887                            unsigned base, unsigned size)
888 {
889    assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
890 
891    struct nouveau_screen *screen = nouveau_screen(buf->base.screen);
892    int ret;
893 
894    assert(buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY);
895 
896    buf->base.width0 = base + size;
897    if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
898       return false;
899 
900    ret = nouveau_bo_map(buf->bo, 0, nv->client);
901    if (ret)
902       return false;
903    memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size);
904 
905    return true;
906 }
907 
908 /* Invalidate underlying buffer storage, reset fences, reallocate to non-busy
909  * buffer.
910  */
911 void
nouveau_buffer_invalidate(struct pipe_context * pipe,struct pipe_resource * resource)912 nouveau_buffer_invalidate(struct pipe_context *pipe,
913                           struct pipe_resource *resource)
914 {
915    struct nouveau_context *nv = nouveau_context(pipe);
916    struct nv04_resource *buf = nv04_resource(resource);
917    int ref = buf->base.reference.count - 1;
918 
919    assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
920 
921    /* Shared buffers shouldn't get reallocated */
922    if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
923       return;
924 
925    /* If the buffer is sub-allocated and not currently being written, just
926     * wipe the valid buffer range. Otherwise we have to create fresh
927     * storage. (We don't keep track of fences for non-sub-allocated BO's.)
928     */
929    if (buf->mm && !nouveau_buffer_busy(buf, PIPE_MAP_WRITE)) {
930       util_range_set_empty(&buf->valid_buffer_range);
931    } else {
932       nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
933       if (ref > 0) /* any references inside context possible ? */
934          nv->invalidate_resource_storage(nv, &buf->base, ref);
935    }
936 }
937 
938 
939 /* Scratch data allocation. */
940 
941 static inline int
nouveau_scratch_bo_alloc(struct nouveau_context * nv,struct nouveau_bo ** pbo,unsigned size)942 nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo,
943                          unsigned size)
944 {
945    return nouveau_bo_new(nv->screen->device, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
946                          4096, size, NULL, pbo);
947 }
948 
949 static void
nouveau_scratch_unref_bos(void * d)950 nouveau_scratch_unref_bos(void *d)
951 {
952    struct runout *b = d;
953    int i;
954 
955    for (i = 0; i < b->nr; ++i)
956       nouveau_bo_ref(NULL, &b->bo[i]);
957 
958    FREE(b);
959 }
960 
961 void
nouveau_scratch_runout_release(struct nouveau_context * nv)962 nouveau_scratch_runout_release(struct nouveau_context *nv)
963 {
964    if (!nv->scratch.runout)
965       return;
966 
967    if (!nouveau_fence_work(nv->screen->fence.current, nouveau_scratch_unref_bos,
968          nv->scratch.runout))
969       return;
970 
971    nv->scratch.end = 0;
972    nv->scratch.runout = NULL;
973 }
974 
975 /* Allocate an extra bo if we can't fit everything we need simultaneously.
976  * (Could happen for very large user arrays.)
977  */
978 static inline bool
nouveau_scratch_runout(struct nouveau_context * nv,unsigned size)979 nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
980 {
981    int ret;
982    unsigned n;
983 
984    if (nv->scratch.runout)
985       n = nv->scratch.runout->nr;
986    else
987       n = 0;
988    nv->scratch.runout = REALLOC(nv->scratch.runout, n == 0 ? 0 :
989                                 (sizeof(*nv->scratch.runout) + (n + 0) * sizeof(void *)),
990                                  sizeof(*nv->scratch.runout) + (n + 1) * sizeof(void *));
991    nv->scratch.runout->nr = n + 1;
992    nv->scratch.runout->bo[n] = NULL;
993 
994    ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout->bo[n], size);
995    if (!ret) {
996       ret = nouveau_bo_map(nv->scratch.runout->bo[n], 0, NULL);
997       if (ret)
998          nouveau_bo_ref(NULL, &nv->scratch.runout->bo[--nv->scratch.runout->nr]);
999    }
1000    if (!ret) {
1001       nv->scratch.current = nv->scratch.runout->bo[n];
1002       nv->scratch.offset = 0;
1003       nv->scratch.end = size;
1004       nv->scratch.map = nv->scratch.current->map;
1005    }
1006    return !ret;
1007 }
1008 
1009 /* Continue to next scratch buffer, if available (no wrapping, large enough).
1010  * Allocate it if it has not yet been created.
1011  */
1012 static inline bool
nouveau_scratch_next(struct nouveau_context * nv,unsigned size)1013 nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
1014 {
1015    struct nouveau_bo *bo;
1016    int ret;
1017    const unsigned i = (nv->scratch.id + 1) % NOUVEAU_MAX_SCRATCH_BUFS;
1018 
1019    if ((size > nv->scratch.bo_size) || (i == nv->scratch.wrap))
1020       return false;
1021    nv->scratch.id = i;
1022 
1023    bo = nv->scratch.bo[i];
1024    if (!bo) {
1025       ret = nouveau_scratch_bo_alloc(nv, &bo, nv->scratch.bo_size);
1026       if (ret)
1027          return false;
1028       nv->scratch.bo[i] = bo;
1029    }
1030    nv->scratch.current = bo;
1031    nv->scratch.offset = 0;
1032    nv->scratch.end = nv->scratch.bo_size;
1033 
1034    ret = nouveau_bo_map(bo, NOUVEAU_BO_WR, nv->client);
1035    if (!ret)
1036       nv->scratch.map = bo->map;
1037    return !ret;
1038 }
1039 
1040 static bool
nouveau_scratch_more(struct nouveau_context * nv,unsigned min_size)1041 nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size)
1042 {
1043    bool ret;
1044 
1045    ret = nouveau_scratch_next(nv, min_size);
1046    if (!ret)
1047       ret = nouveau_scratch_runout(nv, min_size);
1048    return ret;
1049 }
1050 
1051 
1052 /* Copy data to a scratch buffer and return address & bo the data resides in. */
1053 uint64_t
nouveau_scratch_data(struct nouveau_context * nv,const void * data,unsigned base,unsigned size,struct nouveau_bo ** bo)1054 nouveau_scratch_data(struct nouveau_context *nv,
1055                      const void *data, unsigned base, unsigned size,
1056                      struct nouveau_bo **bo)
1057 {
1058    unsigned bgn = MAX2(base, nv->scratch.offset);
1059    unsigned end = bgn + size;
1060 
1061    if (end >= nv->scratch.end) {
1062       end = base + size;
1063       if (!nouveau_scratch_more(nv, end))
1064          return 0;
1065       bgn = base;
1066    }
1067    nv->scratch.offset = align(end, 4);
1068 
1069    memcpy(nv->scratch.map + bgn, (const uint8_t *)data + base, size);
1070 
1071    *bo = nv->scratch.current;
1072    return (*bo)->offset + (bgn - base);
1073 }
1074 
1075 void *
nouveau_scratch_get(struct nouveau_context * nv,unsigned size,uint64_t * gpu_addr,struct nouveau_bo ** pbo)1076 nouveau_scratch_get(struct nouveau_context *nv,
1077                     unsigned size, uint64_t *gpu_addr, struct nouveau_bo **pbo)
1078 {
1079    unsigned bgn = nv->scratch.offset;
1080    unsigned end = nv->scratch.offset + size;
1081 
1082    if (end >= nv->scratch.end) {
1083       end = size;
1084       if (!nouveau_scratch_more(nv, end))
1085          return NULL;
1086       bgn = 0;
1087    }
1088    nv->scratch.offset = align(end, 4);
1089 
1090    *pbo = nv->scratch.current;
1091    *gpu_addr = nv->scratch.current->offset + bgn;
1092    return nv->scratch.map + bgn;
1093 }
1094