1 /*
2  * Copyright 2012 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Ben Skeggs
23  *
24  */
25 
26 #define XFER_ARGS                                                              \
27    struct nv30_context *nv30, enum nv30_transfer_filter filter,                \
28    struct nv30_rect *src, struct nv30_rect *dst
29 
30 #include "util/u_math.h"
31 
32 #include "nv_object.xml.h"
33 #include "nv_m2mf.xml.h"
34 #include "nv30/nv01_2d.xml.h"
35 #include "nv30/nv30-40_3d.xml.h"
36 
37 #include "nv30/nv30_context.h"
38 #include "nv30/nv30_transfer.h"
39 
40 /* Various helper functions to transfer different types of data in a number
41  * of different ways.
42  */
43 
44 static inline bool
nv30_transfer_scaled(struct nv30_rect * src,struct nv30_rect * dst)45 nv30_transfer_scaled(struct nv30_rect *src, struct nv30_rect *dst)
46 {
47    if (src->x1 - src->x0 != dst->x1 - dst->x0)
48       return true;
49    if (src->y1 - src->y0 != dst->y1 - dst->y0)
50       return true;
51    return false;
52 }
53 
54 static inline bool
nv30_transfer_blit(XFER_ARGS)55 nv30_transfer_blit(XFER_ARGS)
56 {
57    if (nv30->screen->eng3d->oclass < NV40_3D_CLASS)
58       return false;
59    if (dst->offset & 63 || dst->pitch & 63 || dst->d > 1)
60       return false;
61    if (dst->w < 2 || dst->h < 2)
62       return false;
63    if (dst->cpp > 4 || (dst->cpp == 1 && !dst->pitch))
64       return false;
65    if (src->cpp > 4)
66       return false;
67    return true;
68 }
69 
70 static inline struct nouveau_heap *
nv30_transfer_rect_vertprog(struct nv30_context * nv30)71 nv30_transfer_rect_vertprog(struct nv30_context *nv30)
72 {
73    struct nouveau_heap *heap = nv30->screen->vp_exec_heap;
74    struct nouveau_heap *vp;
75 
76    vp = nv30->blit_vp;
77    if (!vp) {
78       if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) {
79          while (heap->next && heap->size < 2) {
80             struct nouveau_heap **evict = heap->next->priv;
81             nouveau_heap_free(evict);
82          }
83 
84          if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp))
85             return NULL;
86       }
87 
88       vp = nv30->blit_vp;
89       if (vp) {
90          struct nouveau_pushbuf *push = nv30->base.pushbuf;
91 
92          BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1);
93          PUSH_DATA (push, vp->start);
94          BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
95          PUSH_DATA (push, 0x401f9c6c); /* mov o[hpos], a[0]; */
96          PUSH_DATA (push, 0x0040000d);
97          PUSH_DATA (push, 0x8106c083);
98          PUSH_DATA (push, 0x6041ff80);
99          BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
100          PUSH_DATA (push, 0x401f9c6c); /* mov o[tex0], a[8]; end; */
101          PUSH_DATA (push, 0x0040080d);
102          PUSH_DATA (push, 0x8106c083);
103          PUSH_DATA (push, 0x6041ff9d);
104       }
105    }
106 
107    return vp;
108 }
109 
110 
111 static inline struct nv04_resource *
nv30_transfer_rect_fragprog(struct nv30_context * nv30)112 nv30_transfer_rect_fragprog(struct nv30_context *nv30)
113 {
114    struct nv04_resource *fp = nv04_resource(nv30->blit_fp);
115    struct pipe_context *pipe = &nv30->base.pipe;
116 
117    if (!fp) {
118       nv30->blit_fp =
119          pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STAGING, 12 * 4);
120       if (nv30->blit_fp) {
121          struct pipe_transfer *transfer;
122          u32 *map = pipe_buffer_map(pipe, nv30->blit_fp,
123                                     PIPE_MAP_WRITE, &transfer);
124          if (map) {
125             map[0] = 0x17009e00; /* texr r0, i[tex0], texture[0]; end; */
126             map[1] = 0x1c9dc801;
127             map[2] = 0x0001c800;
128             map[3] = 0x3fe1c800;
129             map[4] = 0x01401e81; /* end; */
130             map[5] = 0x1c9dc800;
131             map[6] = 0x0001c800;
132             map[7] = 0x0001c800;
133             pipe_buffer_unmap(pipe, transfer);
134          }
135 
136          fp = nv04_resource(nv30->blit_fp);
137          nouveau_buffer_migrate(&nv30->base, fp, NOUVEAU_BO_VRAM);
138       }
139    }
140 
141    return fp;
142 }
143 
144 static void
nv30_transfer_rect_blit(XFER_ARGS)145 nv30_transfer_rect_blit(XFER_ARGS)
146 {
147    struct nv04_resource *fp = nv30_transfer_rect_fragprog(nv30);
148    struct nouveau_heap *vp = nv30_transfer_rect_vertprog(nv30);
149    struct nouveau_pushbuf *push = nv30->base.pushbuf;
150    struct nouveau_pushbuf_refn refs[] = {
151       { fp->bo, fp->domain | NOUVEAU_BO_RD },
152       { src->bo, src->domain | NOUVEAU_BO_RD },
153       { dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR },
154    };
155    u32 texfmt, texswz;
156    u32 format, stride;
157 
158    if (nouveau_pushbuf_space(push, 512, 8, 0) ||
159        nouveau_pushbuf_refn (push, refs, ARRAY_SIZE(refs)))
160       return;
161 
162    /* various switches depending on cpp of the transfer */
163    switch (dst->cpp) {
164    case 4:
165       format = NV30_3D_RT_FORMAT_COLOR_A8R8G8B8 |
166                NV30_3D_RT_FORMAT_ZETA_Z24S8;
167       texfmt = NV40_3D_TEX_FORMAT_FORMAT_A8R8G8B8;
168       texswz = 0x0000aae4;
169       break;
170    case 2:
171       format = NV30_3D_RT_FORMAT_COLOR_R5G6B5 |
172                NV30_3D_RT_FORMAT_ZETA_Z16;
173       texfmt = NV40_3D_TEX_FORMAT_FORMAT_R5G6B5;
174       texswz = 0x0000a9e4;
175       break;
176    case 1:
177       format = NV30_3D_RT_FORMAT_COLOR_B8 |
178                NV30_3D_RT_FORMAT_ZETA_Z16;
179       texfmt = NV40_3D_TEX_FORMAT_FORMAT_L8;
180       texswz = 0x0000aaff;
181       break;
182    default:
183       assert(0);
184       return;
185    }
186 
187    /* render target */
188    if (!dst->pitch) {
189       format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED;
190       format |= util_logbase2(dst->w) << 16;
191       format |= util_logbase2(dst->h) << 24;
192       stride  = 64;
193    } else {
194       format |= NV30_3D_RT_FORMAT_TYPE_LINEAR;
195       stride  = dst->pitch;
196    }
197 
198    BEGIN_NV04(push, NV30_3D(VIEWPORT_HORIZ), 2);
199    PUSH_DATA (push, dst->w << 16);
200    PUSH_DATA (push, dst->h << 16);
201    BEGIN_NV04(push, NV30_3D(RT_HORIZ), 5);
202    PUSH_DATA (push, dst->w << 16);
203    PUSH_DATA (push, dst->h << 16);
204    PUSH_DATA (push, format);
205    PUSH_DATA (push, stride);
206    PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
207    BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1);
208    PUSH_DATA (push, NV30_3D_RT_ENABLE_COLOR0);
209 
210    nv30->dirty |= NV30_NEW_FRAMEBUFFER;
211 
212    /* viewport state */
213    BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8);
214    PUSH_DATAf(push, 0.0);
215    PUSH_DATAf(push, 0.0);
216    PUSH_DATAf(push, 0.0);
217    PUSH_DATAf(push, 0.0);
218    PUSH_DATAf(push, 1.0);
219    PUSH_DATAf(push, 1.0);
220    PUSH_DATAf(push, 1.0);
221    PUSH_DATAf(push, 1.0);
222    BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2);
223    PUSH_DATAf(push, 0.0);
224    PUSH_DATAf(push, 1.0);
225 
226    nv30->dirty |= NV30_NEW_VIEWPORT;
227 
228    /* blend state */
229    BEGIN_NV04(push, NV30_3D(COLOR_LOGIC_OP_ENABLE), 1);
230    PUSH_DATA (push, 0);
231    BEGIN_NV04(push, NV30_3D(DITHER_ENABLE), 1);
232    PUSH_DATA (push, 0);
233    BEGIN_NV04(push, NV30_3D(BLEND_FUNC_ENABLE), 1);
234    PUSH_DATA (push, 0);
235    BEGIN_NV04(push, NV30_3D(COLOR_MASK), 1);
236    PUSH_DATA (push, 0x01010101);
237 
238    nv30->dirty |= NV30_NEW_BLEND;
239 
240    /* depth-stencil-alpha state */
241    BEGIN_NV04(push, NV30_3D(DEPTH_WRITE_ENABLE), 2);
242    PUSH_DATA (push, 0);
243    PUSH_DATA (push, 0);
244    BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(0)), 1);
245    PUSH_DATA (push, 0);
246    BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(1)), 1);
247    PUSH_DATA (push, 0);
248    BEGIN_NV04(push, NV30_3D(ALPHA_FUNC_ENABLE), 1);
249    PUSH_DATA (push, 0);
250 
251    nv30->dirty |= NV30_NEW_ZSA;
252 
253    /* rasterizer state */
254    BEGIN_NV04(push, NV30_3D(SHADE_MODEL), 1);
255    PUSH_DATA (push, NV30_3D_SHADE_MODEL_FLAT);
256    BEGIN_NV04(push, NV30_3D(CULL_FACE_ENABLE), 1);
257    PUSH_DATA (push, 0);
258    BEGIN_NV04(push, NV30_3D(POLYGON_MODE_FRONT), 2);
259    PUSH_DATA (push, NV30_3D_POLYGON_MODE_FRONT_FILL);
260    PUSH_DATA (push, NV30_3D_POLYGON_MODE_BACK_FILL);
261    BEGIN_NV04(push, NV30_3D(POLYGON_OFFSET_FILL_ENABLE), 1);
262    PUSH_DATA (push, 0);
263    BEGIN_NV04(push, NV30_3D(POLYGON_STIPPLE_ENABLE), 1);
264    PUSH_DATA (push, 0);
265 
266    nv30->state.scissor_off = 0;
267    nv30->dirty |= NV30_NEW_RASTERIZER;
268 
269    /* vertex program */
270    BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1);
271    PUSH_DATA (push, vp->start);
272    BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2);
273    PUSH_DATA (push, 0x00000101); /* attrib: 0, 8 */
274    PUSH_DATA (push, 0x00004000); /* result: hpos, tex0 */
275    BEGIN_NV04(push, NV30_3D(ENGINE), 1);
276    PUSH_DATA (push, 0x00000103);
277    BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1);
278    PUSH_DATA (push, 0x00000000);
279 
280    nv30->dirty |= NV30_NEW_VERTPROG;
281    nv30->dirty |= NV30_NEW_CLIP;
282 
283    /* fragment program */
284    BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1);
285    PUSH_RELOC(push, fp->bo, fp->offset, fp->domain |
286                     NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
287                     NV30_3D_FP_ACTIVE_PROGRAM_DMA0,
288                     NV30_3D_FP_ACTIVE_PROGRAM_DMA1);
289    BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1);
290    PUSH_DATA (push, 0x02000000);
291 
292    nv30->state.fragprog = NULL;
293    nv30->dirty |= NV30_NEW_FRAGPROG;
294 
295    /* texture */
296    texfmt |= 1 << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT;
297    texfmt |= NV30_3D_TEX_FORMAT_NO_BORDER;
298    texfmt |= NV40_3D_TEX_FORMAT_RECT;
299    texfmt |= 0x00008000;
300    if (src->d < 2)
301       texfmt |= NV30_3D_TEX_FORMAT_DIMS_2D;
302    else
303       texfmt |= NV30_3D_TEX_FORMAT_DIMS_3D;
304    if (src->pitch)
305       texfmt |= NV40_3D_TEX_FORMAT_LINEAR;
306 
307    BEGIN_NV04(push, NV30_3D(TEX_OFFSET(0)), 8);
308    PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0);
309    PUSH_RELOC(push, src->bo, texfmt, NOUVEAU_BO_OR,
310                     NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1);
311    PUSH_DATA (push, NV30_3D_TEX_WRAP_S_CLAMP_TO_EDGE |
312                     NV30_3D_TEX_WRAP_T_CLAMP_TO_EDGE |
313                     NV30_3D_TEX_WRAP_R_CLAMP_TO_EDGE);
314    PUSH_DATA (push, NV40_3D_TEX_ENABLE_ENABLE);
315    PUSH_DATA (push, texswz);
316    switch (filter) {
317    case BILINEAR:
318       PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_LINEAR |
319                        NV30_3D_TEX_FILTER_MAG_LINEAR | 0x00002000);
320       break;
321    default:
322       PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_NEAREST |
323                        NV30_3D_TEX_FILTER_MAG_NEAREST | 0x00002000);
324       break;
325    }
326    PUSH_DATA (push, (src->w << 16) | src->h);
327    PUSH_DATA (push, 0x00000000);
328    BEGIN_NV04(push, NV40_3D(TEX_SIZE1(0)), 1);
329    PUSH_DATA (push, 0x00100000 | src->pitch);
330    BEGIN_NV04(push, SUBC_3D(0x0b40), 1);
331    PUSH_DATA (push, src->d < 2 ? 0x00000001 : 0x00000000);
332    BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1);
333    PUSH_DATA (push, 1);
334 
335    nv30->fragprog.dirty_samplers |= 1;
336    nv30->dirty |= NV30_NEW_FRAGTEX;
337 
338    /* blit! */
339    BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2);
340    PUSH_DATA (push, (dst->x1 - dst->x0) << 16 | dst->x0);
341    PUSH_DATA (push, (dst->y1 - dst->y0) << 16 | dst->y0);
342    BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
343    PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_QUADS);
344    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
345    PUSH_DATAf(push, src->x0);
346    PUSH_DATAf(push, src->y0);
347    PUSH_DATAf(push, src->z);
348    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
349    PUSH_DATA (push, (dst->y0 << 16) | dst->x0);
350    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
351    PUSH_DATAf(push, src->x1);
352    PUSH_DATAf(push, src->y0);
353    PUSH_DATAf(push, src->z);
354    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
355    PUSH_DATA (push, (dst->y0 << 16) | dst->x1);
356    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
357    PUSH_DATAf(push, src->x1);
358    PUSH_DATAf(push, src->y1);
359    PUSH_DATAf(push, src->z);
360    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
361    PUSH_DATA (push, (dst->y1 << 16) | dst->x1);
362    BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
363    PUSH_DATAf(push, src->x0);
364    PUSH_DATAf(push, src->y1);
365    PUSH_DATAf(push, src->z);
366    BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
367    PUSH_DATA (push, (dst->y1 << 16) | dst->x0);
368    BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
369    PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
370 }
371 
372 static bool
nv30_transfer_sifm(XFER_ARGS)373 nv30_transfer_sifm(XFER_ARGS)
374 {
375    if (!src->pitch || src->w > 1024 || src->h > 1024 || src->w < 2 || src->h < 2)
376       return false;
377 
378    if (src->d > 1 || dst->d > 1)
379       return false;
380 
381    if (dst->offset & 63)
382       return false;
383 
384    if (!dst->pitch) {
385       if (dst->w > 2048 || dst->h > 2048 || dst->w < 2 || dst->h < 2)
386          return false;
387    } else {
388       if (dst->domain != NOUVEAU_BO_VRAM)
389          return false;
390       if (dst->pitch & 63)
391          return false;
392    }
393 
394    return true;
395 }
396 
397 static void
nv30_transfer_rect_sifm(XFER_ARGS)398 nv30_transfer_rect_sifm(XFER_ARGS)
399 
400 {
401    struct nouveau_pushbuf *push = nv30->base.pushbuf;
402    struct nouveau_pushbuf_refn refs[] = {
403       { src->bo, src->domain | NOUVEAU_BO_RD },
404       { dst->bo, dst->domain | NOUVEAU_BO_WR },
405    };
406    struct nv04_fifo *fifo = push->channel->data;
407    unsigned si_fmt, si_arg;
408    unsigned ss_fmt;
409 
410    switch (dst->cpp) {
411    case 4: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_A8R8G8B8; break;
412    case 2: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_R5G6B5; break;
413    default:
414       ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_Y8;
415       break;
416    }
417 
418    switch (src->cpp) {
419    case 4: si_fmt = NV03_SIFM_COLOR_FORMAT_A8R8G8B8; break;
420    case 2: si_fmt = NV03_SIFM_COLOR_FORMAT_R5G6B5; break;
421    default:
422       si_fmt = NV03_SIFM_COLOR_FORMAT_AY8;
423       break;
424    }
425 
426    if (filter == NEAREST) {
427       si_arg  = NV03_SIFM_FORMAT_ORIGIN_CENTER;
428       si_arg |= NV03_SIFM_FORMAT_FILTER_POINT_SAMPLE;
429    } else {
430       si_arg  = NV03_SIFM_FORMAT_ORIGIN_CORNER;
431       si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR;
432    }
433 
434    if (nouveau_pushbuf_space(push, 64, 6, 0) ||
435        nouveau_pushbuf_refn (push, refs, 2))
436       return;
437 
438    if (dst->pitch) {
439       BEGIN_NV04(push, NV04_SF2D(DMA_IMAGE_SOURCE), 2);
440       PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
441       PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
442       BEGIN_NV04(push, NV04_SF2D(FORMAT), 4);
443       PUSH_DATA (push, ss_fmt);
444       PUSH_DATA (push, dst->pitch << 16 | dst->pitch);
445       PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
446       PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
447       BEGIN_NV04(push, NV05_SIFM(SURFACE), 1);
448       PUSH_DATA (push, nv30->screen->surf2d->handle);
449    } else {
450       BEGIN_NV04(push, NV04_SSWZ(DMA_IMAGE), 1);
451       PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
452       BEGIN_NV04(push, NV04_SSWZ(FORMAT), 2);
453       PUSH_DATA (push, ss_fmt | (util_logbase2(dst->w) << 16) |
454                                 (util_logbase2(dst->h) << 24));
455       PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
456       BEGIN_NV04(push, NV05_SIFM(SURFACE), 1);
457       PUSH_DATA (push, nv30->screen->swzsurf->handle);
458    }
459 
460    BEGIN_NV04(push, NV03_SIFM(DMA_IMAGE), 1);
461    PUSH_RELOC(push, src->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
462    BEGIN_NV04(push, NV03_SIFM(COLOR_FORMAT), 8);
463    PUSH_DATA (push, si_fmt);
464    PUSH_DATA (push, NV03_SIFM_OPERATION_SRCCOPY);
465    PUSH_DATA (push, (           dst->y0  << 16) |            dst->x0);
466    PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0));
467    PUSH_DATA (push, (           dst->y0  << 16) |            dst->x0);
468    PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0));
469    PUSH_DATA (push, ((src->x1 - src->x0) << 20) / (dst->x1 - dst->x0));
470    PUSH_DATA (push, ((src->y1 - src->y0) << 20) / (dst->y1 - dst->y0));
471    BEGIN_NV04(push, NV03_SIFM(SIZE), 4);
472    PUSH_DATA (push, align(src->h, 2) << 16 | align(src->w, 2));
473    PUSH_DATA (push, src->pitch | si_arg);
474    PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0);
475    PUSH_DATA (push, (src->y0 << 20) | src->x0 << 4);
476 }
477 
478 /* The NOP+OFFSET_OUT stuff after each M2MF transfer *is* actually required
479  * to prevent some odd things from happening, easily reproducible by
480  * attempting to do conditional rendering that has a M2MF transfer done
481  * some time before it.  0x1e98 will fail with a DMA_W_PROTECTION (assuming
482  * that name is still accurate on nv4x) error.
483  */
484 
485 static bool
nv30_transfer_m2mf(XFER_ARGS)486 nv30_transfer_m2mf(XFER_ARGS)
487 {
488    if (!src->pitch || !dst->pitch)
489       return false;
490    if (nv30_transfer_scaled(src, dst))
491       return false;
492    return true;
493 }
494 
495 static void
nv30_transfer_rect_m2mf(XFER_ARGS)496 nv30_transfer_rect_m2mf(XFER_ARGS)
497 {
498    struct nouveau_pushbuf *push = nv30->base.pushbuf;
499    struct nouveau_pushbuf_refn refs[] = {
500       { src->bo, src->domain | NOUVEAU_BO_RD },
501       { dst->bo, dst->domain | NOUVEAU_BO_WR },
502    };
503    struct nv04_fifo *fifo = push->channel->data;
504    unsigned src_offset = src->offset;
505    unsigned dst_offset = dst->offset;
506    unsigned w = dst->x1 - dst->x0;
507    unsigned h = dst->y1 - dst->y0;
508 
509    src_offset += (src->y0 * src->pitch) + (src->x0 * src->cpp);
510    dst_offset += (dst->y0 * dst->pitch) + (dst->x0 * dst->cpp);
511 
512    BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2);
513    PUSH_DATA (push, (src->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
514    PUSH_DATA (push, (dst->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
515 
516    while (h) {
517       unsigned lines = (h > 2047) ? 2047 : h;
518 
519       if (nouveau_pushbuf_space(push, 32, 2, 0) ||
520           nouveau_pushbuf_refn (push, refs, 2))
521          return;
522 
523       BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
524       PUSH_RELOC(push, src->bo, src_offset, NOUVEAU_BO_LOW, 0, 0);
525       PUSH_RELOC(push, dst->bo, dst_offset, NOUVEAU_BO_LOW, 0, 0);
526       PUSH_DATA (push, src->pitch);
527       PUSH_DATA (push, dst->pitch);
528       PUSH_DATA (push, w * src->cpp);
529       PUSH_DATA (push, lines);
530       PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
531                        NV03_M2MF_FORMAT_OUTPUT_INC_1);
532       PUSH_DATA (push, 0x00000000);
533       BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
534       PUSH_DATA (push, 0x00000000);
535       BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
536       PUSH_DATA (push, 0x00000000);
537 
538       h -= lines;
539       src_offset += src->pitch * lines;
540       dst_offset += dst->pitch * lines;
541    }
542 }
543 
544 static bool
nv30_transfer_cpu(XFER_ARGS)545 nv30_transfer_cpu(XFER_ARGS)
546 {
547    if (nv30_transfer_scaled(src, dst))
548       return false;
549    return true;
550 }
551 
552 static char *
linear_ptr(struct nv30_rect * rect,char * base,int x,int y,int z)553 linear_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
554 {
555    return base + (y * rect->pitch) + (x * rect->cpp);
556 }
557 
558 static inline unsigned
swizzle2d(unsigned v,unsigned s)559 swizzle2d(unsigned v, unsigned s)
560 {
561    v = (v | (v << 8)) & 0x00ff00ff;
562    v = (v | (v << 4)) & 0x0f0f0f0f;
563    v = (v | (v << 2)) & 0x33333333;
564    v = (v | (v << 1)) & 0x55555555;
565    return v << s;
566 }
567 
568 static char *
swizzle2d_ptr(struct nv30_rect * rect,char * base,int x,int y,int z)569 swizzle2d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
570 {
571    unsigned k = util_logbase2(MIN2(rect->w, rect->h));
572    unsigned km = (1 << k) - 1;
573    unsigned nx = rect->w >> k;
574    unsigned tx = x >> k;
575    unsigned ty = y >> k;
576    unsigned m;
577 
578    m  = swizzle2d(x & km, 0);
579    m |= swizzle2d(y & km, 1);
580    m += ((ty * nx) + tx) << k << k;
581 
582    return base + (m * rect->cpp);
583 }
584 
585 static char *
swizzle3d_ptr(struct nv30_rect * rect,char * base,int x,int y,int z)586 swizzle3d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
587 {
588    unsigned w = rect->w >> 1;
589    unsigned h = rect->h >> 1;
590    unsigned d = rect->d >> 1;
591    unsigned i = 0, o;
592    unsigned v = 0;
593 
594    do {
595       o = i;
596       if (w) {
597          v |= (x & 1) << i++;
598          x >>= 1;
599          w >>= 1;
600       }
601       if (h) {
602          v |= (y & 1) << i++;
603          y >>= 1;
604          h >>= 1;
605       }
606       if (d) {
607          v |= (z & 1) << i++;
608          z >>= 1;
609          d >>= 1;
610       }
611    } while(o != i);
612 
613    return base + (v * rect->cpp);
614 }
615 
616 typedef char *(*get_ptr_t)(struct nv30_rect *, char *, int, int, int);
617 
618 static inline get_ptr_t
get_ptr(struct nv30_rect * rect)619 get_ptr(struct nv30_rect *rect)
620 {
621    if (rect->pitch)
622       return linear_ptr;
623 
624    if (rect->d <= 1)
625       return swizzle2d_ptr;
626 
627    return swizzle3d_ptr;
628 }
629 
630 static void
nv30_transfer_rect_cpu(XFER_ARGS)631 nv30_transfer_rect_cpu(XFER_ARGS)
632 {
633    get_ptr_t sp = get_ptr(src);
634    get_ptr_t dp = get_ptr(dst);
635    char *srcmap, *dstmap;
636    int x, y;
637 
638    nouveau_bo_map(src->bo, NOUVEAU_BO_RD, nv30->base.client);
639    nouveau_bo_map(dst->bo, NOUVEAU_BO_WR, nv30->base.client);
640    srcmap = src->bo->map + src->offset;
641    dstmap = dst->bo->map + dst->offset;
642 
643    for (y = 0; y < (dst->y1 - dst->y0); y++) {
644       for (x = 0; x < (dst->x1 - dst->x0); x++) {
645          memcpy(dp(dst, dstmap, dst->x0 + x, dst->y0 + y, dst->z),
646                 sp(src, srcmap, src->x0 + x, src->y0 + y, src->z), dst->cpp);
647       }
648    }
649 }
650 
651 void
nv30_transfer_rect(struct nv30_context * nv30,enum nv30_transfer_filter filter,struct nv30_rect * src,struct nv30_rect * dst)652 nv30_transfer_rect(struct nv30_context *nv30, enum nv30_transfer_filter filter,
653                    struct nv30_rect *src, struct nv30_rect *dst)
654 {
655    static const struct {
656       char *name;
657       bool (*possible)(XFER_ARGS);
658       void (*execute)(XFER_ARGS);
659    } *method, methods[] = {
660       { "m2mf", nv30_transfer_m2mf, nv30_transfer_rect_m2mf },
661       { "sifm", nv30_transfer_sifm, nv30_transfer_rect_sifm },
662       { "blit", nv30_transfer_blit, nv30_transfer_rect_blit },
663       { "rect", nv30_transfer_cpu, nv30_transfer_rect_cpu },
664       {}
665    };
666 
667    for (method = methods; method->possible; method++) {
668       if (method->possible(nv30, filter, src, dst)) {
669          method->execute(nv30, filter, src, dst);
670          return;
671       }
672    }
673 
674    assert(0);
675 }
676 
677 void
nv30_transfer_push_data(struct nouveau_context * nv,struct nouveau_bo * bo,unsigned offset,unsigned domain,unsigned size,void * data)678 nv30_transfer_push_data(struct nouveau_context *nv,
679                         struct nouveau_bo *bo, unsigned offset, unsigned domain,
680                         unsigned size, void *data)
681 {
682    /* use ifc, or scratch + copy_data? */
683    fprintf(stderr, "nv30: push_data not implemented\n");
684 }
685 
686 void
nv30_transfer_copy_data(struct nouveau_context * nv,struct nouveau_bo * dst,unsigned d_off,unsigned d_dom,struct nouveau_bo * src,unsigned s_off,unsigned s_dom,unsigned size)687 nv30_transfer_copy_data(struct nouveau_context *nv,
688                         struct nouveau_bo *dst, unsigned d_off, unsigned d_dom,
689                         struct nouveau_bo *src, unsigned s_off, unsigned s_dom,
690                         unsigned size)
691 {
692    struct nv04_fifo *fifo = nv->screen->channel->data;
693    struct nouveau_pushbuf_refn refs[] = {
694       { src, s_dom | NOUVEAU_BO_RD },
695       { dst, d_dom | NOUVEAU_BO_WR },
696    };
697    struct nouveau_pushbuf *push = nv->pushbuf;
698    unsigned pages, lines;
699 
700    pages = size >> 12;
701    size -= (pages << 12);
702 
703    BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2);
704    PUSH_DATA (push, (s_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
705    PUSH_DATA (push, (d_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
706 
707    while (pages) {
708       lines  = (pages > 2047) ? 2047 : pages;
709       pages -= lines;
710 
711       if (nouveau_pushbuf_space(push, 32, 2, 0) ||
712           nouveau_pushbuf_refn (push, refs, 2))
713          return;
714 
715       BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
716       PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0);
717       PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0);
718       PUSH_DATA (push, 4096);
719       PUSH_DATA (push, 4096);
720       PUSH_DATA (push, 4096);
721       PUSH_DATA (push, lines);
722       PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
723                        NV03_M2MF_FORMAT_OUTPUT_INC_1);
724       PUSH_DATA (push, 0x00000000);
725       BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
726       PUSH_DATA (push, 0x00000000);
727       BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
728       PUSH_DATA (push, 0x00000000);
729 
730       s_off += (lines << 12);
731       d_off += (lines << 12);
732    }
733 
734    if (size) {
735       if (nouveau_pushbuf_space(push, 32, 2, 0) ||
736           nouveau_pushbuf_refn (push, refs, 2))
737          return;
738 
739       BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
740       PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0);
741       PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0);
742       PUSH_DATA (push, size);
743       PUSH_DATA (push, size);
744       PUSH_DATA (push, size);
745       PUSH_DATA (push, 1);
746       PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
747                        NV03_M2MF_FORMAT_OUTPUT_INC_1);
748       PUSH_DATA (push, 0x00000000);
749       BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
750       PUSH_DATA (push, 0x00000000);
751       BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
752       PUSH_DATA (push, 0x00000000);
753    }
754 }
755