1 /*
2  * Copyright 2012 Francisco Jerez
3  * Copyright 2015 Samuel Pitoiset
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sublicense, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial
15  * portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  */
26 
27 #include "util/format/u_format.h"
28 #include "nv50/nv50_context.h"
29 #include "nv50/nv50_compute.xml.h"
30 
31 #include "codegen/nv50_ir_driver.h"
32 
33 int
nv50_screen_compute_setup(struct nv50_screen * screen,struct nouveau_pushbuf * push)34 nv50_screen_compute_setup(struct nv50_screen *screen,
35                           struct nouveau_pushbuf *push)
36 {
37    struct nouveau_device *dev = screen->base.device;
38    struct nouveau_object *chan = screen->base.channel;
39    struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
40    unsigned obj_class;
41    int i, ret;
42 
43    switch (dev->chipset & 0xf0) {
44    case 0x50:
45    case 0x80:
46    case 0x90:
47       obj_class = NV50_COMPUTE_CLASS;
48       break;
49    case 0xa0:
50       switch (dev->chipset) {
51       case 0xa3:
52       case 0xa5:
53       case 0xa8:
54          obj_class = NVA3_COMPUTE_CLASS;
55          break;
56       default:
57          obj_class = NV50_COMPUTE_CLASS;
58          break;
59       }
60       break;
61    default:
62       NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
63       return -1;
64    }
65 
66    ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
67                             &screen->compute);
68    if (ret)
69       return ret;
70 
71    BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
72    PUSH_DATA (push, screen->compute->handle);
73 
74    BEGIN_NV04(push, NV50_CP(UNK02A0), 1);
75    PUSH_DATA (push, 1);
76    BEGIN_NV04(push, NV50_CP(DMA_STACK), 1);
77    PUSH_DATA (push, fifo->vram);
78    BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2);
79    PUSH_DATAh(push, screen->stack_bo->offset);
80    PUSH_DATA (push, screen->stack_bo->offset);
81    BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1);
82    PUSH_DATA (push, 4);
83 
84    BEGIN_NV04(push, NV50_CP(UNK0290), 1);
85    PUSH_DATA (push, 1);
86    BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1);
87    PUSH_DATA (push, 1);
88    BEGIN_NV04(push, NV50_CP(REG_MODE), 1);
89    PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
90    BEGIN_NV04(push, NV50_CP(UNK0384), 1);
91    PUSH_DATA (push, 0x100);
92    BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1);
93    PUSH_DATA (push, fifo->vram);
94 
95    for (i = 0; i < 15; i++) {
96       BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2);
97       PUSH_DATA (push, 0);
98       PUSH_DATA (push, 0);
99       BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1);
100       PUSH_DATA (push, 0);
101       BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1);
102       PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
103    }
104 
105    BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2);
106    PUSH_DATA (push, 0);
107    PUSH_DATA (push, 0);
108    BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1);
109    PUSH_DATA (push, ~0);
110    BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1);
111    PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
112 
113    BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1);
114    PUSH_DATA (push, 7);
115    BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1);
116    PUSH_DATA (push, 1);
117    BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1);
118    PUSH_DATA (push, 7);
119    BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1);
120    PUSH_DATA (push, 1);
121    BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
122    PUSH_DATA (push, 0);
123 
124    BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1);
125    PUSH_DATA (push, fifo->vram);
126    BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1);
127    PUSH_DATA (push, 0x54);
128    BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1);
129    PUSH_DATA (push, 0);
130 
131    BEGIN_NV04(push, NV50_CP(DMA_TIC), 1);
132    PUSH_DATA (push, fifo->vram);
133    BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3);
134    PUSH_DATAh(push, screen->txc->offset);
135    PUSH_DATA (push, screen->txc->offset);
136    PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
137 
138    BEGIN_NV04(push, NV50_CP(DMA_TSC), 1);
139    PUSH_DATA (push, fifo->vram);
140    BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3);
141    PUSH_DATAh(push, screen->txc->offset + 65536);
142    PUSH_DATA (push, screen->txc->offset + 65536);
143    PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
144 
145    BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1);
146    PUSH_DATA (push, fifo->vram);
147 
148    BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1);
149    PUSH_DATA (push, fifo->vram);
150    BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2);
151    PUSH_DATAh(push, screen->tls_bo->offset + 65536);
152    PUSH_DATA (push, screen->tls_bo->offset + 65536);
153    BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1);
154    PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
155 
156    BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3);
157    PUSH_DATAh(push, screen->uniforms->offset + (3 << 16));
158    PUSH_DATA (push, screen->uniforms->offset + (3 << 16));
159    PUSH_DATA (push, (NV50_CB_PCP << 16) | 0x0000);
160 
161    BEGIN_NV04(push, NV50_CP(QUERY_ADDRESS_HIGH), 2);
162    PUSH_DATAh(push, screen->fence.bo->offset + 16);
163    PUSH_DATA (push, screen->fence.bo->offset + 16);
164 
165    return 0;
166 }
167 
168 static void
nv50_compute_validate_samplers(struct nv50_context * nv50)169 nv50_compute_validate_samplers(struct nv50_context *nv50)
170 {
171    bool need_flush = nv50_validate_tsc(nv50, NV50_SHADER_STAGE_COMPUTE);
172    if (need_flush) {
173       BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TSC_FLUSH), 1);
174       PUSH_DATA (nv50->base.pushbuf, 0);
175    }
176 
177    /* Invalidate all 3D samplers because they are aliased. */
178    nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS;
179 }
180 
181 static void
nv50_compute_validate_textures(struct nv50_context * nv50)182 nv50_compute_validate_textures(struct nv50_context *nv50)
183 {
184    bool need_flush = nv50_validate_tic(nv50, NV50_SHADER_STAGE_COMPUTE);
185    if (need_flush) {
186       BEGIN_NV04(nv50->base.pushbuf, NV50_CP(TIC_FLUSH), 1);
187       PUSH_DATA (nv50->base.pushbuf, 0);
188    }
189 
190    /* Invalidate all 3D textures because they are aliased. */
191    nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
192    nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
193 }
194 
195 static inline void
nv50_compute_invalidate_constbufs(struct nv50_context * nv50)196 nv50_compute_invalidate_constbufs(struct nv50_context *nv50)
197 {
198    int s;
199 
200    /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
201    for (s = 0; s < NV50_MAX_3D_SHADER_STAGES; s++) {
202       nv50->constbuf_dirty[s] |= nv50->constbuf_valid[s];
203       nv50->state.uniform_buffer_bound[s] = false;
204    }
205    nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
206 }
207 
208 static void
nv50_compute_validate_constbufs(struct nv50_context * nv50)209 nv50_compute_validate_constbufs(struct nv50_context *nv50)
210 {
211    struct nouveau_pushbuf *push = nv50->base.pushbuf;
212    const int s = NV50_SHADER_STAGE_COMPUTE;
213 
214    while (nv50->constbuf_dirty[s]) {
215       int i = ffs(nv50->constbuf_dirty[s]) - 1;
216       nv50->constbuf_dirty[s] &= ~(1 << i);
217 
218       if (nv50->constbuf[s][i].user) {
219          const unsigned b = NV50_CB_PVP + s;
220          unsigned start = 0;
221          unsigned words = nv50->constbuf[s][0].size / 4;
222          if (i) {
223             NOUVEAU_ERR("user constbufs only supported in slot 0\n");
224             continue;
225          }
226          if (!nv50->state.uniform_buffer_bound[s]) {
227             nv50->state.uniform_buffer_bound[s] = true;
228             BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
229             PUSH_DATA (push, (b << 12) | (i << 8) | 1);
230          }
231          while (words) {
232             unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
233 
234             PUSH_SPACE(push, nr + 3);
235             BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
236             PUSH_DATA (push, (start << 8) | b);
237             BEGIN_NI04(push, NV50_CP(CB_DATA(0)), nr);
238             PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr);
239 
240             start += nr;
241             words -= nr;
242          }
243       } else {
244          struct nv04_resource *res =
245             nv04_resource(nv50->constbuf[s][i].u.buf);
246          if (res) {
247             /* TODO: allocate persistent bindings */
248             const unsigned b = s * 16 + i;
249 
250             assert(nouveau_resource_mapped_by_gpu(&res->base));
251 
252             BEGIN_NV04(push, NV50_CP(CB_DEF_ADDRESS_HIGH), 3);
253             PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset);
254             PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset);
255             PUSH_DATA (push, (b << 16) |
256                        (nv50->constbuf[s][i].size & 0xffff));
257             BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
258             PUSH_DATA (push, (b << 12) | (i << 8) | 1);
259 
260             BCTX_REFN(nv50->bufctx_cp, CP_CB(i), res, RD);
261 
262             nv50->cb_dirty = 1; /* Force cache flush for UBO. */
263             res->cb_bindings[s] |= 1 << i;
264          } else {
265             BEGIN_NV04(push, NV50_CP(SET_PROGRAM_CB), 1);
266             PUSH_DATA (push, (i << 8) | 0);
267          }
268          if (i == 0)
269             nv50->state.uniform_buffer_bound[s] = false;
270       }
271    }
272 
273    // TODO: Check if having orthogonal slots means the two don't trample over
274    // each other.
275    nv50_compute_invalidate_constbufs(nv50);
276 }
277 
278 static void
nv50_get_surface_dims(const struct pipe_image_view * view,int * width,int * height,int * depth)279 nv50_get_surface_dims(const struct pipe_image_view *view,
280                       int *width, int *height, int *depth)
281 {
282    struct nv04_resource *res = nv04_resource(view->resource);
283    int level;
284 
285    *width = *height = *depth = 1;
286    if (res->base.target == PIPE_BUFFER) {
287       *width = view->u.buf.size / util_format_get_blocksize(view->format);
288       return;
289    }
290 
291    level = view->u.tex.level;
292    *width = u_minify(view->resource->width0, level);
293    *height = u_minify(view->resource->height0, level);
294    *depth = u_minify(view->resource->depth0, level);
295 
296    switch (res->base.target) {
297    case PIPE_TEXTURE_1D_ARRAY:
298    case PIPE_TEXTURE_2D_ARRAY:
299    case PIPE_TEXTURE_CUBE:
300    case PIPE_TEXTURE_CUBE_ARRAY:
301       *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
302       break;
303    case PIPE_TEXTURE_1D:
304    case PIPE_TEXTURE_2D:
305    case PIPE_TEXTURE_RECT:
306    case PIPE_TEXTURE_3D:
307       break;
308    default:
309       assert(!"unexpected texture target");
310       break;
311    }
312 }
313 
314 static void
nv50_mark_image_range_valid(const struct pipe_image_view * view)315 nv50_mark_image_range_valid(const struct pipe_image_view *view)
316 {
317    struct nv04_resource *res = (struct nv04_resource *)view->resource;
318 
319    assert(view->resource->target == PIPE_BUFFER);
320 
321    util_range_add(&res->base, &res->valid_buffer_range,
322                   view->u.buf.offset,
323                   view->u.buf.offset + view->u.buf.size);
324 }
325 
326 static inline void
nv50_set_surface_info(struct nouveau_pushbuf * push,const struct pipe_image_view * view,int width,int height,int depth)327 nv50_set_surface_info(struct nouveau_pushbuf *push,
328                       const struct pipe_image_view *view,
329                       int width, int height, int depth)
330 {
331    struct nv04_resource *res;
332    uint32_t *const info = push->cur;
333 
334    push->cur += 12;
335 
336    /* Make sure to always initialize the surface information area because it's
337     * used to check if the given image is bound or not. */
338    memset(info, 0, 12 * sizeof(*info));
339 
340    if (!view || !view->resource)
341       return;
342    res = nv04_resource(view->resource);
343 
344    /* Stick the image dimensions for the imageSize() builtin. */
345    info[0] = width;
346    info[1] = height;
347    info[2] = depth;
348 
349    /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
350     * offset and to check if the format doesn't mismatch. */
351    info[3] = util_format_get_blocksize(view->format);
352 
353    if (res->base.target != PIPE_BUFFER) {
354       struct nv50_miptree *mt = nv50_miptree(&res->base);
355       struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
356       unsigned nby = align(util_format_get_nblocksy(view->format, height),
357                            NV50_TILE_SIZE_Y(lvl->tile_mode));
358 
359       if (mt->layout_3d) {
360          info[4] = nby;
361          info[11] = view->u.tex.first_layer;
362       } else {
363          info[4] = mt->layer_stride / lvl->pitch;
364       }
365       info[6] = mt->ms_x;
366       info[7] = mt->ms_y;
367       info[8] = NV50_TILE_SHIFT_X(lvl->tile_mode);
368       info[9] = NV50_TILE_SHIFT_Y(lvl->tile_mode);
369       info[10] = NV50_TILE_SHIFT_Z(lvl->tile_mode);
370    }
371 }
372 
373 static void
nv50_compute_validate_surfaces(struct nv50_context * nv50)374 nv50_compute_validate_surfaces(struct nv50_context *nv50)
375 {
376    struct nouveau_pushbuf *push = nv50->base.pushbuf;
377    int i;
378 
379    for (i = 0; i < NV50_MAX_GLOBALS - 1; i++) {
380       struct nv50_gmem_state *gmem = &nv50->compprog->cp.gmem[i];
381       int width, height, depth;
382       uint64_t address = 0;
383 
384       BEGIN_NV04(push, NV50_CP(GLOBAL(i)), 5);
385 
386       if (gmem->valid && !gmem->image && nv50->buffers[gmem->slot].buffer) {
387          struct pipe_shader_buffer *buffer = &nv50->buffers[gmem->slot];
388          struct nv04_resource *res = nv04_resource(buffer->buffer);
389          PUSH_DATAh(push, res->address + buffer->buffer_offset);
390          PUSH_DATA (push, res->address + buffer->buffer_offset);
391          PUSH_DATA (push, 0); /* pitch? */
392          PUSH_DATA (push, ALIGN(buffer->buffer_size, 256) - 1);
393          PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
394          BCTX_REFN(nv50->bufctx_cp, CP_BUF, res, RDWR);
395          util_range_add(&res->base, &res->valid_buffer_range,
396                         buffer->buffer_offset,
397                         buffer->buffer_offset +
398                         buffer->buffer_size);
399 
400          PUSH_SPACE(push, 1 + 3);
401          BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
402          PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
403          BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 1);
404          PUSH_DATA (push, buffer->buffer_size);
405       } else if (gmem->valid && gmem->image && nv50->images[gmem->slot].resource) {
406          struct pipe_image_view *view = &nv50->images[gmem->slot];
407          struct nv04_resource *res = nv04_resource(view->resource);
408 
409          /* get surface dimensions based on the target. */
410          nv50_get_surface_dims(view, &width, &height, &depth);
411 
412          address = res->address;
413          if (res->base.target == PIPE_BUFFER) {
414             address += view->u.buf.offset;
415             assert(!(address & 0xff));
416 
417             if (view->access & PIPE_IMAGE_ACCESS_WRITE)
418                nv50_mark_image_range_valid(view);
419 
420             PUSH_DATAh(push, address);
421             PUSH_DATA (push, address);
422             PUSH_DATA (push, 0); /* pitch? */
423             PUSH_DATA (push, ALIGN(view->u.buf.size, 0x100) - 1);
424             PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
425          } else {
426             struct nv50_miptree *mt = nv50_miptree(view->resource);
427             struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
428             const unsigned z = view->u.tex.first_layer;
429             unsigned max_size;
430 
431             if (mt->layout_3d) {
432                address += nv50_mt_zslice_offset(mt, view->u.tex.level, 0);
433                max_size = mt->total_size;
434             } else {
435                address += mt->layer_stride * z;
436                max_size = mt->layer_stride * (view->u.tex.last_layer - view->u.tex.first_layer + 1);
437             }
438             address += lvl->offset;
439 
440             PUSH_DATAh(push, address);
441             PUSH_DATA (push, address);
442             if (mt->layout_3d) {
443                // We have to adjust the size of the 3d surface to be
444                // accessible within 2d limits. The size of each z tile goes
445                // into the x direction, while the number of z tiles goes into
446                // the y direction.
447                const unsigned nby = util_format_get_nblocksy(view->format, height);
448                const unsigned tsy = NV50_TILE_SIZE_Y(lvl->tile_mode);
449                const unsigned tsz = NV50_TILE_SIZE_Z(lvl->tile_mode);
450                const unsigned pitch = lvl->pitch * tsz;
451                const unsigned maxy = align(nby, tsy) * align(depth, tsz) >> NV50_TILE_SHIFT_Z(lvl->tile_mode);
452                PUSH_DATA (push, pitch * tsy);
453                PUSH_DATA (push, (maxy - 1) << 16 | (pitch - 1));
454                PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4);
455             } else if (nouveau_bo_memtype(res->bo)) {
456                PUSH_DATA (push, lvl->pitch * NV50_TILE_SIZE_Y(lvl->tile_mode));
457                PUSH_DATA (push, (max_size / lvl->pitch - 1) << 16 | (lvl->pitch - 1));
458                PUSH_DATA (push, (lvl->tile_mode & 0xff) << 4);
459             } else {
460                PUSH_DATA (push, lvl->pitch);
461                PUSH_DATA (push, align(lvl->pitch * height, 0x100) - 1);
462                PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
463             }
464          }
465 
466          BCTX_REFN(nv50->bufctx_cp, CP_SUF, res, RDWR);
467 
468          PUSH_SPACE(push, 12 + 3);
469          BEGIN_NV04(push, NV50_CP(CB_ADDR), 1);
470          PUSH_DATA (push, NV50_CB_AUX_BUF_INFO(i) << (8 - 2) | NV50_CB_AUX);
471          BEGIN_NI04(push, NV50_CP(CB_DATA(0)), 12);
472          nv50_set_surface_info(push, view, width, height, depth);
473       } else {
474          PUSH_DATA (push, 0);
475          PUSH_DATA (push, 0);
476          PUSH_DATA (push, 0);
477          PUSH_DATA (push, 0);
478          PUSH_DATA (push, 0);
479       }
480    }
481 }
482 
483 static void
nv50_compute_validate_globals(struct nv50_context * nv50)484 nv50_compute_validate_globals(struct nv50_context *nv50)
485 {
486    unsigned i;
487 
488    for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *);
489         ++i) {
490       struct pipe_resource *res = *util_dynarray_element(
491          &nv50->global_residents, struct pipe_resource *, i);
492       if (res)
493          nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL,
494                                   nv04_resource(res), NOUVEAU_BO_RDWR);
495    }
496 }
497 
498 static struct nv50_state_validate
499 validate_list_cp[] = {
500    { nv50_compprog_validate,              NV50_NEW_CP_PROGRAM     },
501    { nv50_compute_validate_constbufs,     NV50_NEW_CP_CONSTBUF    },
502    { nv50_compute_validate_surfaces,      NV50_NEW_CP_SURFACES |
503                                           NV50_NEW_CP_BUFFERS  |
504                                           NV50_NEW_CP_PROGRAM     },
505    { nv50_compute_validate_textures,      NV50_NEW_CP_TEXTURES    },
506    { nv50_compute_validate_samplers,      NV50_NEW_CP_SAMPLERS    },
507    { nv50_compute_validate_globals,       NV50_NEW_CP_GLOBALS     },
508 };
509 
510 static bool
nv50_state_validate_cp(struct nv50_context * nv50,uint32_t mask)511 nv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask)
512 {
513    bool ret;
514 
515    /* TODO: validate textures, samplers, surfaces */
516    ret = nv50_state_validate(nv50, mask, validate_list_cp,
517                              ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp,
518                              nv50->bufctx_cp);
519 
520    if (unlikely(nv50->state.flushed))
521       nv50_bufctx_fence(nv50->bufctx_cp, true);
522    return ret;
523 }
524 
525 static void
nv50_compute_upload_input(struct nv50_context * nv50,const uint32_t * input)526 nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
527 {
528    struct nv50_screen *screen = nv50->screen;
529    struct nouveau_pushbuf *push = screen->base.pushbuf;
530    unsigned size = align(nv50->compprog->parm_size, 0x4);
531 
532    BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
533    PUSH_DATA (push, (1 + (size / 4)) << 8);
534 
535    if (size) {
536       struct nouveau_mm_allocation *mm;
537       struct nouveau_bo *bo = NULL;
538       unsigned offset;
539 
540       mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
541       assert(mm);
542 
543       nouveau_bo_map(bo, 0, screen->base.client);
544       memcpy(bo->map + offset, input, size);
545 
546       nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
547       nouveau_pushbuf_bufctx(push, nv50->bufctx);
548       nouveau_pushbuf_validate(push);
549 
550       nouveau_pushbuf_space(push, 0, 0, 1);
551 
552       BEGIN_NV04(push, NV50_CP(USER_PARAM(1)), size / 4);
553       nouveau_pushbuf_data(push, bo, offset, size);
554 
555       nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
556       nouveau_bo_ref(NULL, &bo);
557       nouveau_bufctx_reset(nv50->bufctx, 0);
558    }
559 }
560 
561 void
nv50_launch_grid(struct pipe_context * pipe,const struct pipe_grid_info * info)562 nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
563 {
564    struct nv50_context *nv50 = nv50_context(pipe);
565    struct nouveau_pushbuf *push = nv50->base.pushbuf;
566    unsigned block_size = info->block[0] * info->block[1] * info->block[2];
567    struct nv50_program *cp = nv50->compprog;
568    bool ret;
569 
570    ret = !nv50_state_validate_cp(nv50, ~0);
571    if (ret) {
572       NOUVEAU_ERR("Failed to launch grid !\n");
573       return;
574    }
575 
576    nv50_compute_upload_input(nv50, info->input);
577 
578    BEGIN_NV04(push, NV50_CP(CP_START_ID), 1);
579    PUSH_DATA (push, cp->code_base);
580 
581    BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1);
582    PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x14, 0x40));
583    BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1);
584    PUSH_DATA (push, cp->max_gpr);
585 
586    /* no indirect support - just read the parameters out */
587    uint32_t grid[3];
588    if (unlikely(info->indirect)) {
589       pipe_buffer_read(pipe, info->indirect, info->indirect_offset,
590                        sizeof(grid), grid);
591    } else {
592       memcpy(grid, info->grid, sizeof(grid));
593    }
594 
595    /* grid/block setup */
596    BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2);
597    PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
598    PUSH_DATA (push, info->block[2]);
599    BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1);
600    PUSH_DATA (push, 1 << 16 | block_size);
601    BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1);
602    PUSH_DATA (push, 1);
603    BEGIN_NV04(push, NV50_CP(GRIDDIM), 1);
604    PUSH_DATA (push, grid[1] << 16 | grid[0]);
605    BEGIN_NV04(push, NV50_CP(GRIDID), 1);
606    PUSH_DATA (push, 1);
607 
608    for (int i = 0; i < grid[2]; i++) {
609       BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), 1);
610       PUSH_DATA (push, grid[2] | i << 16);
611 
612       /* kernel launching */
613       BEGIN_NV04(push, NV50_CP(LAUNCH), 1);
614       PUSH_DATA (push, 0);
615    }
616 
617    BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
618    PUSH_DATA (push, 0);
619 
620    /* bind a compute shader clobbers fragment shader state */
621    nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
622 
623    nv50->compute_invocations += info->block[0] * info->block[1] * info->block[2] *
624       grid[0] * grid[1] * grid[2];
625 }
626