1 /**************************************************************************
2  *
3  * Copyright 2019 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  * Authors: James Zhu <james.zhu<@amd.com>
27  *
28  **************************************************************************/
29 
30 #include <assert.h>
31 
32 #include "tgsi/tgsi_text.h"
33 #include "vl_compositor_cs.h"
34 
35 struct cs_viewport {
36    float scale_x;
37    float scale_y;
38    struct u_rect area;
39    int translate_x;
40    int translate_y;
41    float sampler0_w;
42    float sampler0_h;
43 };
44 
45 const char *compute_shader_video_buffer =
46       "COMP\n"
47       "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
48       "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
49       "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
50 
51       "DCL SV[0], THREAD_ID\n"
52       "DCL SV[1], BLOCK_ID\n"
53 
54       "DCL CONST[0..6]\n"
55       "DCL SVIEW[0..2], RECT, FLOAT\n"
56       "DCL SAMP[0..2]\n"
57 
58       "DCL IMAGE[0], 2D, WR\n"
59       "DCL TEMP[0..7]\n"
60 
61       "IMM[0] UINT32 { 8, 8, 1, 0}\n"
62       "IMM[1] FLT32 { 1.0, 0.0, 0.0, 0.0}\n"
63 
64       "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
65 
66       /* Drawn area check */
67       "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
68       "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
69       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
70       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
71       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
72 
73       "UIF TEMP[1].xxxx\n"
74          /* Translate */
75          "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"
76          "U2F TEMP[2].xy, TEMP[2].xyyy\n"
77          "MUL TEMP[3].xy, TEMP[2].xyyy, CONST[6].xyyy\n"
78 
79          /* Scale */
80          "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwww\n"
81          "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwww\n"
82 
83          /* Fetch texels */
84          "TEX_LZ TEMP[4].x, TEMP[2].xyyy, SAMP[0], RECT\n"
85          "TEX_LZ TEMP[4].y, TEMP[3].xyyy, SAMP[1], RECT\n"
86          "TEX_LZ TEMP[4].z, TEMP[3].xyyy, SAMP[2], RECT\n"
87 
88          "MOV TEMP[4].w, IMM[1].xxxx\n"
89 
90          /* Color Space Conversion */
91          "DP4 TEMP[7].x, CONST[0], TEMP[4]\n"
92          "DP4 TEMP[7].y, CONST[1], TEMP[4]\n"
93          "DP4 TEMP[7].z, CONST[2], TEMP[4]\n"
94 
95          "MOV TEMP[5].w, TEMP[4].zzzz\n"
96          "SLE TEMP[6].w, TEMP[5].wwww, CONST[3].xxxx\n"
97          "SGT TEMP[5].w, TEMP[5].wwww, CONST[3].yyyy\n"
98 
99          "MAX TEMP[7].w, TEMP[5].wwww, TEMP[6].wwww\n"
100 
101          "STORE IMAGE[0], TEMP[0].xyyy, TEMP[7], 2D\n"
102       "ENDIF\n"
103 
104       "END\n";
105 
106 const char *compute_shader_weave =
107       "COMP\n"
108       "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
109       "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
110       "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
111 
112       "DCL SV[0], THREAD_ID\n"
113       "DCL SV[1], BLOCK_ID\n"
114 
115       "DCL CONST[0..5]\n"
116       "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
117       "DCL SAMP[0..2]\n"
118 
119       "DCL IMAGE[0], 2D, WR\n"
120       "DCL TEMP[0..15]\n"
121 
122       "IMM[0] UINT32 { 8, 8, 1, 0}\n"
123       "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
124       "IMM[2] UINT32 { 1, 2, 4, 0}\n"
125       "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
126 
127       "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
128 
129       /* Drawn area check */
130       "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
131       "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
132       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
133       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
134       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
135 
136       "UIF TEMP[1].xxxx\n"
137          "MOV TEMP[2].xy, TEMP[0].xyyy\n"
138          /* Translate */
139          "UADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5].xyxy\n"
140 
141          /* Top Y */
142          "U2F TEMP[2].xy, TEMP[2].xyyy\n"
143          "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
144          /* Down Y */
145          "MOV TEMP[12].xy, TEMP[2].xyyy\n"
146 
147          /* Top UV */
148          "MOV TEMP[3].xy, TEMP[2].xyyy\n"
149          "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
150          /* Down UV */
151          "MOV TEMP[13].xy, TEMP[3].xyyy\n"
152 
153          /* Texture offset */
154          "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
155          "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
156          "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"
157          "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"
158 
159          "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"
160          "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"
161          "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"
162          "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
163 
164          /* Scale */
165          "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"
166          "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[3].zwzw\n"
167          "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwzw\n"
168          "DIV TEMP[13].xy, TEMP[13].xyyy, CONST[3].zwzw\n"
169 
170          /* Weave offset */
171          "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
172          "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"
173          "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"
174          "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"
175 
176          /* Texture layer */
177          "MOV TEMP[14].x, TEMP[2].yyyy\n"
178          "MOV TEMP[14].yz, TEMP[3].yyyy\n"
179          "ROUND TEMP[15].xyz, TEMP[14].xyzz\n"
180          "ADD TEMP[14].xyz, TEMP[14].xyzz, -TEMP[15].xyzz\n"
181          "MOV TEMP[14].xyz, |TEMP[14].xyzz|\n"
182          "MUL TEMP[14].xyz, TEMP[14].xyzz, IMM[1].yyyy\n"
183 
184          /* Normalize */
185          "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[5].zwzw\n"
186          "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[5].zwzw\n"
187          "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
188          "DIV TEMP[3].xy, TEMP[3].xyyy, TEMP[15].xyxy\n"
189          "DIV TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xyxy\n"
190 
191          /* Fetch texels */
192          "MOV TEMP[2].z, IMM[1].wwww\n"
193          "MOV TEMP[3].z, IMM[1].wwww\n"
194          "TEX_LZ TEMP[10].x, TEMP[2].xyzz, SAMP[0], 2D_ARRAY\n"
195          "TEX_LZ TEMP[10].y, TEMP[3].xyzz, SAMP[1], 2D_ARRAY\n"
196          "TEX_LZ TEMP[10].z, TEMP[3].xyzz, SAMP[2], 2D_ARRAY\n"
197 
198          "MOV TEMP[12].z, IMM[1].xxxx\n"
199          "MOV TEMP[13].z, IMM[1].xxxx\n"
200          "TEX_LZ TEMP[11].x, TEMP[12].xyzz, SAMP[0], 2D_ARRAY\n"
201          "TEX_LZ TEMP[11].y, TEMP[13].xyzz, SAMP[1], 2D_ARRAY\n"
202          "TEX_LZ TEMP[11].z, TEMP[13].xyzz, SAMP[2], 2D_ARRAY\n"
203 
204          "LRP TEMP[6].xyz, TEMP[14].xyzz, TEMP[10].xyzz, TEMP[11].xyzz\n"
205          "MOV TEMP[6].w, IMM[1].xxxx\n"
206 
207          /* Color Space Conversion */
208          "DP4 TEMP[9].x, CONST[0], TEMP[6]\n"
209          "DP4 TEMP[9].y, CONST[1], TEMP[6]\n"
210          "DP4 TEMP[9].z, CONST[2], TEMP[6]\n"
211 
212          "MOV TEMP[7].w, TEMP[6].zzzz\n"
213          "SLE TEMP[8].w, TEMP[7].wwww, CONST[3].xxxx\n"
214          "SGT TEMP[7].w, TEMP[7].wwww, CONST[3].yyyy\n"
215 
216          "MAX TEMP[9].w, TEMP[7].wwww, TEMP[8].wwww\n"
217 
218          "STORE IMAGE[0], TEMP[0].xyyy, TEMP[9], 2D\n"
219       "ENDIF\n"
220 
221       "END\n";
222 
223 const char *compute_shader_rgba =
224       "COMP\n"
225       "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
226       "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
227       "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
228 
229       "DCL SV[0], THREAD_ID\n"
230       "DCL SV[1], BLOCK_ID\n"
231 
232       "DCL CONST[0..5]\n"
233       "DCL SVIEW[0], RECT, FLOAT\n"
234       "DCL SAMP[0]\n"
235 
236       "DCL IMAGE[0], 2D, WR\n"
237       "DCL TEMP[0..3]\n"
238 
239       "IMM[0] UINT32 { 8, 8, 1, 0}\n"
240       "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
241 
242       "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
243 
244       /* Drawn area check */
245       "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
246       "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
247       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
248       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
249       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
250 
251       "UIF TEMP[1].xxxx\n"
252          /* Translate */
253          "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"
254          "U2F TEMP[2].xy, TEMP[2].xyyy\n"
255 
256          /* Scale */
257          "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"
258 
259          /* Fetch texels */
260          "TEX_LZ TEMP[3], TEMP[2].xyyy, SAMP[0], RECT\n"
261 
262          "STORE IMAGE[0], TEMP[0].xyyy, TEMP[3], 2D\n"
263       "ENDIF\n"
264 
265       "END\n";
266 
267 static const char *compute_shader_yuv_weave_y =
268       "COMP\n"
269       "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
270       "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
271       "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
272 
273       "DCL SV[0], THREAD_ID\n"
274       "DCL SV[1], BLOCK_ID\n"
275 
276       "DCL CONST[0..5]\n"
277       "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
278       "DCL SAMP[0..2]\n"
279 
280       "DCL IMAGE[0], 2D, WR\n"
281       "DCL TEMP[0..15]\n"
282 
283       "IMM[0] UINT32 { 8, 8, 1, 0}\n"
284       "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
285       "IMM[2] UINT32 { 1, 2, 4, 0}\n"
286       "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
287 
288       "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
289 
290       /* Drawn area check */
291       "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
292       "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
293       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
294       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
295       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
296 
297       "UIF TEMP[1]\n"
298          "MOV TEMP[2], TEMP[0]\n"
299          /* Translate */
300          "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
301 
302          /* Top Y */
303          "U2F TEMP[2], TEMP[2]\n"
304          "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
305          /* Down Y */
306          "MOV TEMP[12], TEMP[2]\n"
307 
308          /* Top UV */
309          "MOV TEMP[3], TEMP[2]\n"
310          "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
311          /* Down UV */
312          "MOV TEMP[13], TEMP[3]\n"
313 
314          /* Texture offset */
315          "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
316          "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
317          "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"
318          "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"
319 
320          "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"
321          "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"
322          "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"
323          "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
324 
325          /* Scale */
326          "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"
327          "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"
328          "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"
329          "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"
330 
331          /* Weave offset */
332          "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
333          "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"
334          "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"
335          "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"
336 
337          /* Texture layer */
338          "MOV TEMP[14].x, TEMP[2].yyyy\n"
339          "MOV TEMP[14].yz, TEMP[3].yyyy\n"
340          "ROUND TEMP[15], TEMP[14]\n"
341          "ADD TEMP[14], TEMP[14], -TEMP[15]\n"
342          "MOV TEMP[14], |TEMP[14]|\n"
343          "MUL TEMP[14], TEMP[14], IMM[1].yyyy\n"
344 
345          /* Normalize */
346          "DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n"
347          "DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n"
348          "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
349          "DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n"
350          "DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n"
351 
352          /* Fetch texels */
353          "MOV TEMP[2].z, IMM[1].wwww\n"
354          "MOV TEMP[3].z, IMM[1].wwww\n"
355          "TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n"
356          "TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n"
357          "TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n"
358 
359          "MOV TEMP[12].z, IMM[1].xxxx\n"
360          "MOV TEMP[13].z, IMM[1].xxxx\n"
361          "TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n"
362          "TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n"
363          "TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n"
364 
365          "LRP TEMP[6], TEMP[14], TEMP[10], TEMP[11]\n"
366          "MOV TEMP[6].w, IMM[1].xxxx\n"
367 
368          "STORE IMAGE[0], TEMP[0], TEMP[6], 2D\n"
369       "ENDIF\n"
370 
371       "END\n";
372 
373 static const char *compute_shader_yuv_weave_uv =
374       "COMP\n"
375       "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
376       "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
377       "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
378 
379       "DCL SV[0], THREAD_ID\n"
380       "DCL SV[1], BLOCK_ID\n"
381 
382       "DCL CONST[0..5]\n"
383       "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
384       "DCL SAMP[0..2]\n"
385 
386       "DCL IMAGE[0], 2D, WR\n"
387       "DCL TEMP[0..15]\n"
388 
389       "IMM[0] UINT32 { 8, 8, 1, 0}\n"
390       "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
391       "IMM[2] UINT32 { 1, 2, 4, 0}\n"
392       "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
393 
394       "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
395 
396       /* Drawn area check */
397       "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
398       "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
399       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
400       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
401       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
402 
403       "UIF TEMP[1]\n"
404          "MOV TEMP[2], TEMP[0]\n"
405          /* Translate */
406          "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
407 
408          /* Top Y */
409          "U2F TEMP[2], TEMP[2]\n"
410          "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
411          /* Down Y */
412          "MOV TEMP[12], TEMP[2]\n"
413 
414          /* Top UV */
415          "MOV TEMP[3], TEMP[2]\n"
416          "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
417          /* Down UV */
418          "MOV TEMP[13], TEMP[3]\n"
419 
420          /* Texture offset */
421          "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
422          "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
423          "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"
424          "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"
425 
426          "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"
427          "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"
428          "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"
429          "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
430 
431          /* Scale */
432          "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"
433          "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"
434          "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"
435          "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"
436 
437          /* Weave offset */
438          "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
439          "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"
440          "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"
441          "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"
442 
443          /* Texture layer */
444          "MOV TEMP[14].x, TEMP[2].yyyy\n"
445          "MOV TEMP[14].yz, TEMP[3].yyyy\n"
446          "ROUND TEMP[15], TEMP[14]\n"
447          "ADD TEMP[14], TEMP[14], -TEMP[15]\n"
448          "MOV TEMP[14], |TEMP[14]|\n"
449          "MUL TEMP[14], TEMP[14], IMM[1].yyyy\n"
450 
451          /* Normalize */
452          "DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n"
453          "DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n"
454          "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
455          "DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n"
456          "DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n"
457 
458          /* Fetch texels */
459          "MOV TEMP[2].z, IMM[1].wwww\n"
460          "MOV TEMP[3].z, IMM[1].wwww\n"
461          "TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n"
462          "TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n"
463          "TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n"
464 
465          "MOV TEMP[12].z, IMM[1].xxxx\n"
466          "MOV TEMP[13].z, IMM[1].xxxx\n"
467          "TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n"
468          "TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n"
469          "TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n"
470 
471          "LRP TEMP[6], TEMP[14], TEMP[10], TEMP[11]\n"
472          "MOV TEMP[6].w, IMM[1].xxxx\n"
473 
474          "MOV TEMP[7].xy, TEMP[6].yzww\n"
475 
476          "STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n"
477       "ENDIF\n"
478 
479       "END\n";
480 
481 static const char *compute_shader_yuv_bob_y =
482       "COMP\n"
483       "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
484       "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
485       "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
486 
487       "DCL SV[0], THREAD_ID\n"
488       "DCL SV[1], BLOCK_ID\n"
489 
490       "DCL CONST[0..5]\n"
491       "DCL SVIEW[0..2], RECT, FLOAT\n"
492       "DCL SAMP[0..2]\n"
493 
494       "DCL IMAGE[0], 2D, WR\n"
495       "DCL TEMP[0..4]\n"
496 
497       "IMM[0] UINT32 { 8, 8, 1, 0}\n"
498       "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
499 
500       "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
501 
502       /* Drawn area check */
503       "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
504       "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
505       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
506       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
507       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
508 
509       "UIF TEMP[1]\n"
510          /* Translate */
511          "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
512          "U2F TEMP[2], TEMP[2]\n"
513          "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n"
514 
515          /* Scale */
516          "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
517          "DIV TEMP[2], TEMP[2], IMM[1].xyxy\n"
518          "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
519          "DIV TEMP[3], TEMP[3], IMM[1].xyxy\n"
520 
521          /* Fetch texels */
522          "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
523          "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
524          "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
525 
526          "MOV TEMP[4].w, IMM[1].xxxx\n"
527 
528          "STORE IMAGE[0], TEMP[0], TEMP[4], 2D\n"
529       "ENDIF\n"
530 
531       "END\n";
532 
533 static const char *compute_shader_yuv_bob_uv =
534       "COMP\n"
535       "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
536       "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
537       "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
538 
539       "DCL SV[0], THREAD_ID\n"
540       "DCL SV[1], BLOCK_ID\n"
541 
542       "DCL CONST[0..5]\n"
543       "DCL SVIEW[0..2], RECT, FLOAT\n"
544       "DCL SAMP[0..2]\n"
545 
546       "DCL IMAGE[0], 2D, WR\n"
547       "DCL TEMP[0..5]\n"
548 
549       "IMM[0] UINT32 { 8, 8, 1, 0}\n"
550       "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
551 
552       "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
553 
554       /* Drawn area check */
555       "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
556       "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
557       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
558       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
559       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
560 
561       "UIF TEMP[1]\n"
562          /* Translate */
563          "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
564          "U2F TEMP[2], TEMP[2]\n"
565          "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n"
566 
567          /* Scale */
568          "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
569          "DIV TEMP[2], TEMP[2], IMM[1].xyxy\n"
570          "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
571          "DIV TEMP[3], TEMP[3], IMM[1].xyxy\n"
572 
573          /* Fetch texels */
574          "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
575          "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
576          "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
577 
578          "MOV TEMP[4].w, IMM[1].xxxx\n"
579 
580          "MOV TEMP[5].xy, TEMP[4].yzww\n"
581 
582          "STORE IMAGE[0], TEMP[0], TEMP[5], 2D\n"
583       "ENDIF\n"
584 
585       "END\n";
586 
587 static void
cs_launch(struct vl_compositor * c,void * cs,const struct u_rect * draw_area)588 cs_launch(struct vl_compositor *c,
589           void                 *cs,
590           const struct u_rect  *draw_area)
591 {
592    struct pipe_context *ctx = c->pipe;
593 
594    /* Bind the image */
595    struct pipe_image_view image = {0};
596    image.resource = c->fb_state.cbufs[0]->texture;
597    image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;
598    image.format = c->fb_state.cbufs[0]->texture->format;
599 
600    ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, &image);
601 
602    /* Bind compute shader */
603    ctx->bind_compute_state(ctx, cs);
604 
605    /* Dispatch compute */
606    struct pipe_grid_info info = {0};
607    info.block[0] = 8;
608    info.block[1] = 8;
609    info.block[2] = 1;
610    info.grid[0] = DIV_ROUND_UP(draw_area->x1, info.block[0]);
611    info.grid[1] = DIV_ROUND_UP(draw_area->y1, info.block[1]);
612    info.grid[2] = 1;
613 
614    ctx->launch_grid(ctx, &info);
615 
616    /* Make the result visible to all clients. */
617    ctx->memory_barrier(ctx, PIPE_BARRIER_ALL);
618 
619 }
620 
621 static inline struct u_rect
calc_drawn_area(struct vl_compositor_state * s,struct vl_compositor_layer * layer)622 calc_drawn_area(struct vl_compositor_state *s,
623                 struct vl_compositor_layer *layer)
624 {
625    struct vertex2f tl, br;
626    struct u_rect result;
627 
628    assert(s && layer);
629 
630    tl = layer->dst.tl;
631    br = layer->dst.br;
632 
633    /* Scale */
634    result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
635    result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
636    result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
637    result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
638 
639    /* Clip */
640    result.x0 = MAX2(result.x0, s->scissor.minx);
641    result.y0 = MAX2(result.y0, s->scissor.miny);
642    result.x1 = MIN2(result.x1, s->scissor.maxx);
643    result.y1 = MIN2(result.y1, s->scissor.maxy);
644    return result;
645 }
646 
647 static bool
set_viewport(struct vl_compositor_state * s,struct cs_viewport * drawn,struct pipe_sampler_view ** samplers)648 set_viewport(struct vl_compositor_state *s,
649              struct cs_viewport         *drawn,
650              struct pipe_sampler_view **samplers)
651 {
652    struct pipe_transfer *buf_transfer;
653 
654    assert(s && drawn);
655 
656    void *ptr = pipe_buffer_map(s->pipe, s->shader_params,
657                                PIPE_MAP_READ | PIPE_MAP_WRITE,
658                                &buf_transfer);
659 
660    if (!ptr)
661      return false;
662 
663    float *ptr_float = (float *)ptr;
664    ptr_float += sizeof(vl_csc_matrix)/sizeof(float) + 2;
665    *ptr_float++ = drawn->scale_x;
666    *ptr_float++ = drawn->scale_y;
667 
668    int *ptr_int = (int *)ptr_float;
669    *ptr_int++ = drawn->area.x0;
670    *ptr_int++ = drawn->area.y0;
671    *ptr_int++ = drawn->area.x1;
672    *ptr_int++ = drawn->area.y1;
673    *ptr_int++ = drawn->translate_x;
674    *ptr_int++ = drawn->translate_y;
675 
676    ptr_float = (float *)ptr_int;
677    *ptr_float++ = drawn->sampler0_w;
678    *ptr_float++ = drawn->sampler0_h;
679 
680    /* compute_shader_video_buffer uses pixel coordinates based on the
681     * Y sampler dimensions. If U/V are using separate planes and are
682     * subsampled, we need to scale the coordinates */
683    if (samplers[1]) {
684       float h_ratio = samplers[1]->texture->width0 /
685                      (float) samplers[0]->texture->width0;
686       *ptr_float++ = h_ratio;
687       float v_ratio = samplers[1]->texture->height0 /
688                      (float) samplers[0]->texture->height0;
689       *ptr_float++ = v_ratio;
690    }
691    pipe_buffer_unmap(s->pipe, buf_transfer);
692 
693    return true;
694 }
695 
696 static void
draw_layers(struct vl_compositor * c,struct vl_compositor_state * s,struct u_rect * dirty)697 draw_layers(struct vl_compositor       *c,
698             struct vl_compositor_state *s,
699             struct u_rect              *dirty)
700 {
701    unsigned i;
702 
703    assert(c);
704 
705    for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
706       if (s->used_layers & (1 << i)) {
707          struct vl_compositor_layer *layer = &s->layers[i];
708          struct pipe_sampler_view **samplers = &layer->sampler_views[0];
709          unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
710          struct cs_viewport drawn;
711 
712          drawn.area = calc_drawn_area(s, layer);
713          drawn.scale_x = layer->viewport.scale[0] /
714                   (float)layer->sampler_views[0]->texture->width0 *
715                   (layer->src.br.x - layer->src.tl.x);
716          drawn.scale_y = layer->viewport.scale[1] /
717                   ((float)layer->sampler_views[0]->texture->height0 *
718                    (s->interlaced ? 2.0 : 1.0) *
719                    (layer->src.br.y - layer->src.tl.y));
720 
721          drawn.translate_x = (int)layer->viewport.translate[0];
722          drawn.translate_y = (int)layer->viewport.translate[1];
723          drawn.sampler0_w = (float)layer->sampler_views[0]->texture->width0;
724          drawn.sampler0_h = (float)layer->sampler_views[0]->texture->height0;
725          set_viewport(s, &drawn, samplers);
726 
727          c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
728                         num_sampler_views, layer->samplers);
729          c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0,
730                         num_sampler_views, 0, false, samplers);
731 
732          cs_launch(c, layer->cs, &(drawn.area));
733 
734          /* Unbind. */
735          c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL);
736          c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, false, NULL);
737          c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_FRAGMENT, 0, 0,
738                         num_sampler_views, false, NULL);
739          c->pipe->bind_compute_state(c->pipe, NULL);
740          c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
741                         num_sampler_views, NULL);
742 
743          if (dirty) {
744             struct u_rect drawn = calc_drawn_area(s, layer);
745             dirty->x0 = MIN2(drawn.x0, dirty->x0);
746             dirty->y0 = MIN2(drawn.y0, dirty->y0);
747             dirty->x1 = MAX2(drawn.x1, dirty->x1);
748             dirty->y1 = MAX2(drawn.y1, dirty->y1);
749          }
750       }
751    }
752 }
753 
754 void *
vl_compositor_cs_create_shader(struct vl_compositor * c,const char * compute_shader_text)755 vl_compositor_cs_create_shader(struct vl_compositor *c,
756                                const char           *compute_shader_text)
757 {
758    assert(c && compute_shader_text);
759 
760    struct tgsi_token tokens[1024];
761    if (!tgsi_text_translate(compute_shader_text, tokens, ARRAY_SIZE(tokens))) {
762       assert(0);
763       return NULL;
764    }
765 
766    struct pipe_compute_state state = {0};
767    state.ir_type = PIPE_SHADER_IR_TGSI;
768    state.prog = tokens;
769 
770    /* create compute shader */
771    return c->pipe->create_compute_state(c->pipe, &state);
772 }
773 
774 void
vl_compositor_cs_render(struct vl_compositor_state * s,struct vl_compositor * c,struct pipe_surface * dst_surface,struct u_rect * dirty_area,bool clear_dirty)775 vl_compositor_cs_render(struct vl_compositor_state *s,
776                         struct vl_compositor       *c,
777                         struct pipe_surface        *dst_surface,
778                         struct u_rect              *dirty_area,
779                         bool                        clear_dirty)
780 {
781    assert(c && s);
782    assert(dst_surface);
783 
784    c->fb_state.width = dst_surface->width;
785    c->fb_state.height = dst_surface->height;
786    c->fb_state.cbufs[0] = dst_surface;
787 
788    if (!s->scissor_valid) {
789       s->scissor.minx = 0;
790       s->scissor.miny = 0;
791       s->scissor.maxx = dst_surface->width;
792       s->scissor.maxy = dst_surface->height;
793    }
794 
795    if (clear_dirty && dirty_area &&
796        (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
797 
798       c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
799                        0, 0, dst_surface->width, dst_surface->height, false);
800       dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY;
801       dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY;
802    }
803 
804    pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params);
805 
806    draw_layers(c, s, dirty_area);
807 }
808 
vl_compositor_cs_init_shaders(struct vl_compositor * c)809 bool vl_compositor_cs_init_shaders(struct vl_compositor *c)
810 {
811         assert(c);
812 
813         c->cs_video_buffer = vl_compositor_cs_create_shader(c, compute_shader_video_buffer);
814         if (!c->cs_video_buffer) {
815                 debug_printf("Unable to create video_buffer compute shader.\n");
816                 return false;
817         }
818 
819         c->cs_weave_rgb = vl_compositor_cs_create_shader(c, compute_shader_weave);
820         if (!c->cs_weave_rgb) {
821                 debug_printf("Unable to create weave_rgb compute shader.\n");
822                 return false;
823         }
824 
825         c->cs_yuv.weave.y = vl_compositor_cs_create_shader(c, compute_shader_yuv_weave_y);
826         c->cs_yuv.weave.uv = vl_compositor_cs_create_shader(c, compute_shader_yuv_weave_uv);
827         c->cs_yuv.bob.y = vl_compositor_cs_create_shader(c, compute_shader_yuv_bob_y);
828         c->cs_yuv.bob.uv = vl_compositor_cs_create_shader(c, compute_shader_yuv_bob_uv);
829         if (!c->cs_yuv.weave.y || !c->cs_yuv.weave.uv ||
830             !c->cs_yuv.bob.y || !c->cs_yuv.bob.uv) {
831                 debug_printf("Unable to create YCbCr i-to-YCbCr p deint compute shader.\n");
832                 return false;
833         }
834 
835         return true;
836 }
837 
vl_compositor_cs_cleanup_shaders(struct vl_compositor * c)838 void vl_compositor_cs_cleanup_shaders(struct vl_compositor *c)
839 {
840         assert(c);
841 
842         if (c->cs_video_buffer)
843                 c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer);
844         if (c->cs_weave_rgb)
845                 c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb);
846         if (c->cs_yuv.weave.y)
847                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.y);
848         if (c->cs_yuv.weave.uv)
849                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.weave.uv);
850         if (c->cs_yuv.bob.y)
851                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.bob.y);
852         if (c->cs_yuv.bob.uv)
853                 c->pipe->delete_compute_state(c->pipe, c->cs_yuv.bob.uv);
854 }
855