1 /*
2  * Copyright © 2016 Red Hat
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <stdbool.h>
25 
26 #include "st_tgsi_lower_yuv.h"
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "util/u_debug.h"
30 
31 #include "util/bitscan.h"
32 
33 struct tgsi_yuv_transform {
34    struct tgsi_transform_context base;
35    struct tgsi_shader_info info;
36    struct tgsi_full_src_register imm[4];
37    struct {
38       struct tgsi_full_src_register src;
39       struct tgsi_full_dst_register dst;
40    } tmp[2];
41 #define A 0
42 #define B 1
43 
44    /* Maps a primary sampler (used for Y) to the U or UV sampler.  In
45     * case of 3-plane YUV format, the V plane is next sampler after U.
46     */
47    unsigned char sampler_map[PIPE_MAX_SAMPLERS][2];
48 
49    bool first_instruction_emitted;
50    unsigned free_slots;
51    unsigned lower_nv12;
52    unsigned lower_iyuv;
53 };
54 
55 static inline struct tgsi_yuv_transform *
tgsi_yuv_transform(struct tgsi_transform_context * tctx)56 tgsi_yuv_transform(struct tgsi_transform_context *tctx)
57 {
58    return (struct tgsi_yuv_transform *)tctx;
59 }
60 
61 static void
reg_dst(struct tgsi_full_dst_register * dst,const struct tgsi_full_dst_register * orig_dst,unsigned wrmask)62 reg_dst(struct tgsi_full_dst_register *dst,
63         const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
64 {
65    *dst = *orig_dst;
66    dst->Register.WriteMask &= wrmask;
67    assert(dst->Register.WriteMask);
68 }
69 
70 static inline void
get_swiz(unsigned * swiz,const struct tgsi_src_register * src)71 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
72 {
73    swiz[0] = src->SwizzleX;
74    swiz[1] = src->SwizzleY;
75    swiz[2] = src->SwizzleZ;
76    swiz[3] = src->SwizzleW;
77 }
78 
79 static void
reg_src(struct tgsi_full_src_register * src,const struct tgsi_full_src_register * orig_src,unsigned sx,unsigned sy,unsigned sz,unsigned sw)80 reg_src(struct tgsi_full_src_register *src,
81         const struct tgsi_full_src_register *orig_src,
82         unsigned sx, unsigned sy, unsigned sz, unsigned sw)
83 {
84    unsigned swiz[4];
85    get_swiz(swiz, &orig_src->Register);
86    *src = *orig_src;
87    src->Register.SwizzleX = swiz[sx];
88    src->Register.SwizzleY = swiz[sy];
89    src->Register.SwizzleZ = swiz[sz];
90    src->Register.SwizzleW = swiz[sw];
91 }
92 
93 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
94 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
95       TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
96 
97 static inline struct tgsi_full_instruction
tex_instruction(unsigned samp)98 tex_instruction(unsigned samp)
99 {
100    struct tgsi_full_instruction inst;
101 
102    inst = tgsi_default_full_instruction();
103    inst.Instruction.Opcode = TGSI_OPCODE_TEX;
104    inst.Instruction.Texture = 1;
105    inst.Texture.Texture = TGSI_TEXTURE_2D;
106    inst.Instruction.NumDstRegs = 1;
107    inst.Instruction.NumSrcRegs = 2;
108    inst.Src[1].Register.File  = TGSI_FILE_SAMPLER;
109    inst.Src[1].Register.Index = samp;
110 
111    return inst;
112 }
113 
114 static inline struct tgsi_full_instruction
mov_instruction(void)115 mov_instruction(void)
116 {
117    struct tgsi_full_instruction inst;
118 
119    inst = tgsi_default_full_instruction();
120    inst.Instruction.Opcode = TGSI_OPCODE_MOV;
121    inst.Instruction.Saturate = 0;
122    inst.Instruction.NumDstRegs = 1;
123    inst.Instruction.NumSrcRegs = 1;
124 
125    return inst;
126 }
127 
128 static inline struct tgsi_full_instruction
dp3_instruction(void)129 dp3_instruction(void)
130 {
131    struct tgsi_full_instruction inst;
132 
133    inst = tgsi_default_full_instruction();
134    inst.Instruction.Opcode = TGSI_OPCODE_DP3;
135    inst.Instruction.NumDstRegs = 1;
136    inst.Instruction.NumSrcRegs = 2;
137 
138    return inst;
139 }
140 
141 
142 
143 static void
emit_immed(struct tgsi_transform_context * tctx,int idx,float x,float y,float z,float w)144 emit_immed(struct tgsi_transform_context *tctx, int idx,
145            float x, float y, float z, float w)
146 {
147    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
148    struct tgsi_shader_info *info = &ctx->info;
149    struct tgsi_full_immediate immed;
150 
151    immed = tgsi_default_full_immediate();
152    immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
153    immed.u[0].Float = x;
154    immed.u[1].Float = y;
155    immed.u[2].Float = z;
156    immed.u[3].Float = w;
157    tctx->emit_immediate(tctx, &immed);
158 
159    ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE;
160    ctx->imm[idx].Register.Index = info->immediate_count + idx;
161    ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X;
162    ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y;
163    ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z;
164    ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W;
165 }
166 
167 static void
emit_samp(struct tgsi_transform_context * tctx,unsigned samp)168 emit_samp(struct tgsi_transform_context *tctx, unsigned samp)
169 {
170    tgsi_transform_sampler_decl(tctx, samp);
171    tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D,
172                                     TGSI_RETURN_TYPE_FLOAT);
173 }
174 
175 /* Emit extra declarations we need:
176  *  + 2 TEMP to hold intermediate results
177  *  + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per
178  *    lowered YUV sampler
179  *  + extra immediates for doing CSC
180  */
181 static void
emit_decls(struct tgsi_transform_context * tctx)182 emit_decls(struct tgsi_transform_context *tctx)
183 {
184    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
185    struct tgsi_shader_info *info = &ctx->info;
186    unsigned mask, tempbase, i;
187    struct tgsi_full_declaration decl;
188 
189    /*
190     * Declare immediates for CSC conversion:
191     */
192 
193    /* ITU-R BT.601 conversion */
194    emit_immed(tctx, 0, 1.164f,  0.000f,  1.596f,  0.0f);
195    emit_immed(tctx, 1, 1.164f, -0.392f, -0.813f,  0.0f);
196    emit_immed(tctx, 2, 1.164f,  2.017f,  0.000f,  0.0f);
197    emit_immed(tctx, 3, 0.0625f, 0.500f,  0.500f,  1.0f);
198 
199    /*
200     * Declare extra samplers / sampler-views:
201     */
202 
203    mask = ctx->lower_nv12 | ctx->lower_iyuv;
204    while (mask) {
205       unsigned extra, y_samp = u_bit_scan(&mask);
206 
207       extra = u_bit_scan(&ctx->free_slots);
208       ctx->sampler_map[y_samp][0] = extra;
209       emit_samp(tctx, extra);
210 
211       if (ctx->lower_iyuv & (1 << y_samp)) {
212          extra = u_bit_scan(&ctx->free_slots);
213          ctx->sampler_map[y_samp][1] = extra;
214          emit_samp(tctx, extra);
215       }
216    }
217 
218    /*
219     * Declare extra temp:
220     */
221 
222    tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
223 
224    for (i = 0; i < 2; i++) {
225       decl = tgsi_default_full_declaration();
226       decl.Declaration.File = TGSI_FILE_TEMPORARY;
227       decl.Range.First = decl.Range.Last = tempbase + i;
228       tctx->emit_declaration(tctx, &decl);
229 
230       ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
231       ctx->tmp[i].src.Register.Index = tempbase + i;
232       ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
233       ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
234       ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
235       ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
236 
237       ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
238       ctx->tmp[i].dst.Register.Index = tempbase + i;
239       ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
240    }
241 }
242 
243 /* call with YUV in tmpA.xyz */
244 static void
yuv_to_rgb(struct tgsi_transform_context * tctx,struct tgsi_full_dst_register * dst)245 yuv_to_rgb(struct tgsi_transform_context *tctx,
246            struct tgsi_full_dst_register *dst)
247 {
248    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
249    struct tgsi_full_instruction inst;
250 
251    /*
252     * IMM[0] FLT32 { 1.164,  0.000,  1.596,  0.0 }
253     * IMM[1] FLT32 { 1.164, -0.392, -0.813,  0.0 }
254     * IMM[2] FLT32 { 1.164,  2.017,  0.000,  0.0 }
255     * IMM[3] FLT32 { 0.0625, 0.500,  0.500,  1.0 }
256     */
257 
258    /* SUB tmpA.xyz, tmpA, imm[3] */
259    inst = tgsi_default_full_instruction();
260    inst.Instruction.Opcode = TGSI_OPCODE_ADD;
261    inst.Instruction.Saturate = 0;
262    inst.Instruction.NumDstRegs = 1;
263    inst.Instruction.NumSrcRegs = 2;
264    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
265    reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
266    reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _));
267    inst.Src[1].Register.Negate = 1;
268    tctx->emit_instruction(tctx, &inst);
269 
270    /* DP3 dst.x, tmpA, imm[0] */
271    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
272       inst = dp3_instruction();
273       reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
274       reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
275       reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
276       tctx->emit_instruction(tctx, &inst);
277    }
278 
279    /* DP3 dst.y, tmpA, imm[1] */
280    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
281       inst = dp3_instruction();
282       reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
283       reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
284       reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
285       tctx->emit_instruction(tctx, &inst);
286    }
287 
288    /* DP3 dst.z, tmpA, imm[2] */
289    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
290       inst = dp3_instruction();
291       reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
292       reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
293       reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
294       tctx->emit_instruction(tctx, &inst);
295    }
296 
297    /* MOV dst.w, imm[0].x */
298    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
299       inst = mov_instruction();
300       reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
301       reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
302       tctx->emit_instruction(tctx, &inst);
303    }
304 }
305 
306 static void
lower_nv12(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * originst)307 lower_nv12(struct tgsi_transform_context *tctx,
308            struct tgsi_full_instruction *originst)
309 {
310    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
311    struct tgsi_full_instruction inst;
312    struct tgsi_full_src_register *coord = &originst->Src[0];
313    unsigned samp = originst->Src[1].Register.Index;
314 
315    /* sample Y:
316     *    TEX tempA.x, coord, texture[samp], 2D;
317     */
318    inst = tex_instruction(samp);
319    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
320    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
321    tctx->emit_instruction(tctx, &inst);
322 
323    /* sample UV:
324     *    TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D;
325     *    MOV tempA.yz, tempB._xy_
326     */
327    inst = tex_instruction(ctx->sampler_map[samp][0]);
328    reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY);
329    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
330    tctx->emit_instruction(tctx, &inst);
331 
332    inst = mov_instruction();
333    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ);
334    reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _));
335    tctx->emit_instruction(tctx, &inst);
336 
337    /* At this point, we have YUV in tempA.xyz, rest is common: */
338    yuv_to_rgb(tctx, &originst->Dst[0]);
339 }
340 
341 static void
lower_iyuv(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * originst)342 lower_iyuv(struct tgsi_transform_context *tctx,
343            struct tgsi_full_instruction *originst)
344 {
345    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
346    struct tgsi_full_instruction inst;
347    struct tgsi_full_src_register *coord = &originst->Src[0];
348    unsigned samp = originst->Src[1].Register.Index;
349 
350    /* sample Y:
351     *    TEX tempA.x, coord, texture[samp], 2D;
352     */
353    inst = tex_instruction(samp);
354    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
355    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
356    tctx->emit_instruction(tctx, &inst);
357 
358    /* sample U:
359     *    TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D;
360     *    MOV tempA.y, tempB._x__
361     */
362    inst = tex_instruction(ctx->sampler_map[samp][0]);
363    reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
364    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
365    tctx->emit_instruction(tctx, &inst);
366 
367    inst = mov_instruction();
368    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
369    reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _));
370    tctx->emit_instruction(tctx, &inst);
371 
372    /* sample V:
373     *    TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D;
374     *    MOV tempA.z, tempB.__x_
375     */
376    inst = tex_instruction(ctx->sampler_map[samp][1]);
377    reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
378    reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
379    tctx->emit_instruction(tctx, &inst);
380 
381    inst = mov_instruction();
382    reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
383    reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _));
384    tctx->emit_instruction(tctx, &inst);
385 
386    /* At this point, we have YUV in tempA.xyz, rest is common: */
387    yuv_to_rgb(tctx, &originst->Dst[0]);
388 }
389 
390 static void
transform_instr(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)391 transform_instr(struct tgsi_transform_context *tctx,
392                 struct tgsi_full_instruction *inst)
393 {
394    struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
395 
396    if (!ctx->first_instruction_emitted) {
397       emit_decls(tctx);
398       ctx->first_instruction_emitted = true;
399    }
400 
401    switch (inst->Instruction.Opcode) {
402    /* TODO what other tex opcode's can be used w/ external eglimgs? */
403    case TGSI_OPCODE_TEX: {
404       unsigned samp = inst->Src[1].Register.Index;
405       if (ctx->lower_nv12 & (1 << samp)) {
406          lower_nv12(tctx, inst);
407       } else if (ctx->lower_iyuv & (1 << samp)) {
408          lower_iyuv(tctx, inst);
409       } else {
410          goto skip;
411       }
412       break;
413    }
414    default:
415    skip:
416       tctx->emit_instruction(tctx, inst);
417       return;
418    }
419 }
420 
421 extern const struct tgsi_token *
st_tgsi_lower_yuv(const struct tgsi_token * tokens,unsigned free_slots,unsigned lower_nv12,unsigned lower_iyuv)422 st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
423                   unsigned lower_nv12, unsigned lower_iyuv)
424 {
425    struct tgsi_yuv_transform ctx;
426    struct tgsi_token *newtoks;
427    int newlen;
428 
429    assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */
430 
431 //   tgsi_dump(tokens, 0);
432 //   debug_printf("\n");
433 
434    memset(&ctx, 0, sizeof(ctx));
435    ctx.base.transform_instruction = transform_instr;
436    ctx.free_slots = free_slots;
437    ctx.lower_nv12 = lower_nv12;
438    ctx.lower_iyuv = lower_iyuv;
439    tgsi_scan_shader(tokens, &ctx.info);
440 
441    /* TODO better job of figuring out how many extra tokens we need..
442     * this is a pain about tgsi_transform :-/
443     */
444    newlen = tgsi_num_tokens(tokens) + 300;
445    newtoks = tgsi_alloc_tokens(newlen);
446    if (!newtoks)
447       return NULL;
448 
449    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
450 
451 //   tgsi_dump(newtoks, 0);
452 //   debug_printf("\n");
453 
454    return newtoks;
455 }
456