1 /*
2  * Copyright © 2018 Collabora Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "st_tgsi_lower_depth_clamp.h"
25 #include "tgsi/tgsi_transform.h"
26 #include "tgsi/tgsi_scan.h"
27 
28 struct tgsi_depth_clamp_transform {
29    struct tgsi_transform_context base;
30 
31    struct tgsi_shader_info info;
32 
33    int depth_range_const;
34    int next_generic;
35    int imm;
36    int depth_var;
37    int pos_input;
38    int pos_output;
39    int pos_input_temp;
40    int pos_output_temp;
41    int depth_range_corrected;
42    bool depth_clip_minus_one_to_one;
43 };
44 
45 static inline struct tgsi_depth_clamp_transform *
tgsi_depth_clamp_transform(struct tgsi_transform_context * tctx)46 tgsi_depth_clamp_transform(struct tgsi_transform_context *tctx)
47 {
48    return (struct tgsi_depth_clamp_transform *)tctx;
49 }
50 
51 static void
transform_decl(struct tgsi_transform_context * tctx,struct tgsi_full_declaration * decl)52 transform_decl(struct tgsi_transform_context *tctx,
53                struct tgsi_full_declaration *decl)
54 {
55    struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
56 
57    /* find the next generic index usable for our inserted varying */
58    if (ctx->info.processor == PIPE_SHADER_FRAGMENT) {
59       if (decl->Declaration.File == TGSI_FILE_INPUT &&
60           decl->Semantic.Name == TGSI_SEMANTIC_GENERIC)
61          ctx->next_generic = MAX2(ctx->next_generic, decl->Semantic.Index + 1);
62    } else {
63       if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
64           decl->Semantic.Name == TGSI_SEMANTIC_GENERIC)
65          ctx->next_generic = MAX2(ctx->next_generic, decl->Semantic.Index + 1);
66    }
67 
68    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
69        decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
70       assert(decl->Semantic.Index == 0);
71       ctx->pos_output = decl->Range.First;
72    } else if (decl->Declaration.File == TGSI_FILE_INPUT &&
73               decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
74       assert(decl->Semantic.Index == 0);
75       if (ctx->info.processor == PIPE_SHADER_FRAGMENT)
76          ctx->pos_input = decl->Range.First;
77    }
78 
79    tctx->emit_declaration(tctx, decl);
80 }
81 
82 static void
prolog_common(struct tgsi_depth_clamp_transform * ctx)83 prolog_common(struct tgsi_depth_clamp_transform *ctx)
84 {
85    assert(ctx->depth_range_const >= 0);
86    if (ctx->info.const_file_max[0] < ctx->depth_range_const)
87       tgsi_transform_const_decl(&ctx->base, ctx->depth_range_const,
88                                 ctx->depth_range_const);
89 
90    /* declare a temp for the position-output */
91    ctx->pos_output_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1;
92    tgsi_transform_temp_decl(&ctx->base, ctx->pos_output_temp);
93 }
94 
95 static void
prolog_last_vertex_stage(struct tgsi_transform_context * tctx)96 prolog_last_vertex_stage(struct tgsi_transform_context *tctx)
97 {
98    struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
99 
100    prolog_common(ctx);
101 
102    ctx->imm = ctx->info.immediate_count;
103    tgsi_transform_immediate_decl(tctx, 0.5, 0.0, 0.0, 0.0);
104 
105    /* declare the output */
106    ctx->depth_var = ctx->info.num_outputs;
107    tgsi_transform_output_decl(tctx, ctx->depth_var,
108                               TGSI_SEMANTIC_GENERIC,
109                               ctx->next_generic,
110                               TGSI_INTERPOLATE_LINEAR);
111 }
112 
113 static void
epilog_last_vertex_stage(struct tgsi_transform_context * tctx)114 epilog_last_vertex_stage(struct tgsi_transform_context *tctx)
115 {
116    struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
117 
118    int mad_dst_file = TGSI_FILE_TEMPORARY;
119    int mad_dst_index = ctx->pos_output_temp;
120 
121    if (!ctx->depth_clip_minus_one_to_one) {
122       mad_dst_file = TGSI_FILE_OUTPUT;
123       mad_dst_index = ctx->depth_var;
124    }
125 
126    /* move from temp-register to output */
127    tgsi_transform_op1_inst(tctx, TGSI_OPCODE_MOV,
128                            TGSI_FILE_OUTPUT, ctx->pos_output,
129                            TGSI_WRITEMASK_XYZW,
130                            TGSI_FILE_TEMPORARY, ctx->pos_output_temp);
131 
132    /* Set gl_position.z to 0.0 to avoid clipping */
133    tgsi_transform_op1_swz_inst(tctx, TGSI_OPCODE_MOV,
134                                TGSI_FILE_OUTPUT, ctx->pos_output,
135                                TGSI_WRITEMASK_Z,
136                                TGSI_FILE_IMMEDIATE, ctx->imm,
137                                TGSI_SWIZZLE_Y);
138 
139    /* Evaluate and pass true depth value in depthRange terms */
140    /* z = gl_Position.z / gl_Position.w */
141 
142    struct tgsi_full_instruction inst;
143 
144    inst = tgsi_default_full_instruction();
145    inst.Instruction.Opcode = TGSI_OPCODE_DIV;
146    inst.Instruction.NumDstRegs = 1;
147    inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
148    inst.Dst[0].Register.Index = ctx->pos_output_temp;
149    inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
150    inst.Instruction.NumSrcRegs = 2;
151    tgsi_transform_src_reg_xyzw(&inst.Src[0], TGSI_FILE_TEMPORARY, ctx->pos_output_temp);
152    tgsi_transform_src_reg_xyzw(&inst.Src[1], TGSI_FILE_TEMPORARY, ctx->pos_output_temp);
153    inst.Src[0].Register.SwizzleX =
154          inst.Src[0].Register.SwizzleY =
155          inst.Src[0].Register.SwizzleZ =
156          inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Z;
157 
158    inst.Src[1].Register.SwizzleX =
159          inst.Src[1].Register.SwizzleY =
160          inst.Src[1].Register.SwizzleZ =
161          inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
162 
163    tctx->emit_instruction(tctx, &inst);
164 
165 
166    /* OpenGL Core Profile 4.5 - 13.6.1
167     * The vertex's windows z coordinate zw is given by zw = s * z + b.
168     *
169     * *  With clip control depth mode ZERO_TO_ONE
170     *      s = f - n, b = n, and hence
171     *
172     *     zw_0_1 = z * gl_DepthRange.diff + gl_DepthRange.near
173     */
174    tgsi_transform_op3_swz_inst(tctx, TGSI_OPCODE_MAD,
175                                mad_dst_file, mad_dst_index,
176                                TGSI_WRITEMASK_X,
177                                TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
178                                TGSI_SWIZZLE_X,
179                                false,
180                                TGSI_FILE_CONSTANT, ctx->depth_range_const,
181                                TGSI_SWIZZLE_Z,
182                                TGSI_FILE_CONSTANT, ctx->depth_range_const,
183                                TGSI_SWIZZLE_X);
184 
185    /* If clip control depth mode is NEGATIVE_ONE_TO_ONE, then
186    *     s = 0.5 * (f - n), b = 0.5 * (n + f), and hence
187    *
188    *     zw_m1_1 = 0.5 * (zw_01 + gl_DepthRange.far)
189    */
190    if (ctx->depth_clip_minus_one_to_one) {
191        /* z += gl_DepthRange.far */
192       tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_ADD,
193                                   TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
194                                   TGSI_WRITEMASK_X,
195                                   TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
196                                   TGSI_SWIZZLE_X,
197                                   TGSI_FILE_CONSTANT, ctx->depth_range_const,
198                                   TGSI_SWIZZLE_Y, false);
199       /* z *=  0.5 */
200       tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MUL,
201                                   TGSI_FILE_OUTPUT, ctx->depth_var,
202                                   TGSI_WRITEMASK_X,
203                                   TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
204                                   TGSI_SWIZZLE_X,
205                                   TGSI_FILE_IMMEDIATE, ctx->imm,
206                                   TGSI_SWIZZLE_X, false);
207    }
208 }
209 
210 
211 static void
prolog_fs(struct tgsi_transform_context * tctx)212 prolog_fs(struct tgsi_transform_context *tctx)
213 {
214    struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
215 
216    prolog_common(ctx);
217 
218    ctx->depth_range_corrected = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 2;
219    tgsi_transform_temp_decl(tctx, ctx->depth_range_corrected);
220 
221    /* declare the input */
222    ctx->depth_var = ctx->info.num_inputs;
223    tgsi_transform_input_decl(tctx, ctx->depth_var,
224                              TGSI_SEMANTIC_GENERIC,
225                              ctx->next_generic,
226                              TGSI_INTERPOLATE_LINEAR);
227 
228    /* declare the output */
229    if (ctx->pos_output < 0) {
230       ctx->pos_output = ctx->info.num_outputs;
231       tgsi_transform_output_decl(tctx, ctx->pos_output,
232                                  TGSI_SEMANTIC_POSITION,
233                                  0,
234                                  TGSI_INTERPOLATE_LINEAR);
235    }
236 
237    if (ctx->info.reads_z) {
238       ctx->pos_input_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 3;
239       tgsi_transform_temp_decl(tctx, ctx->pos_input_temp);
240 
241       assert(ctx->pos_input_temp >= 0);
242       /* copy normal position */
243       tgsi_transform_op1_inst(tctx, TGSI_OPCODE_MOV,
244                               TGSI_FILE_TEMPORARY, ctx->pos_input_temp,
245                               TGSI_WRITEMASK_XYZW,
246                               TGSI_FILE_INPUT, ctx->pos_input);
247       /* replace z-component with varying */
248       tgsi_transform_op1_swz_inst(tctx, TGSI_OPCODE_MOV,
249                                   TGSI_FILE_TEMPORARY, ctx->pos_input_temp,
250                                   TGSI_WRITEMASK_Z,
251                                   TGSI_FILE_INPUT, ctx->depth_var,
252                                   TGSI_SWIZZLE_X);
253    }
254 }
255 
256 static void
epilog_fs(struct tgsi_transform_context * tctx)257 epilog_fs(struct tgsi_transform_context *tctx)
258 {
259    struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
260 
261    unsigned src0_file = TGSI_FILE_INPUT;
262    unsigned src0_index = ctx->depth_var;
263    unsigned src0_swizzle = TGSI_SWIZZLE_X;
264 
265    if (ctx->info.writes_z) {
266       src0_file = TGSI_FILE_TEMPORARY;
267       src0_index = ctx->pos_output_temp;
268       src0_swizzle = TGSI_SWIZZLE_Z;
269    }
270 
271    /* it is possible to have gl_DepthRange.near > gl_DepthRange.far, so first
272     * we have to sort the two */
273    tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MIN,
274                                TGSI_FILE_TEMPORARY, ctx->depth_range_corrected,
275                                TGSI_WRITEMASK_X,
276                                TGSI_FILE_CONSTANT, ctx->depth_range_const,
277                                TGSI_SWIZZLE_X,
278                                TGSI_FILE_CONSTANT, ctx->depth_range_const,
279                                TGSI_SWIZZLE_Y,
280                                false);
281 
282    tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MAX,
283                                TGSI_FILE_TEMPORARY, ctx->depth_range_corrected,
284                                TGSI_WRITEMASK_Y,
285                                TGSI_FILE_CONSTANT, ctx->depth_range_const,
286                                TGSI_SWIZZLE_X,
287                                TGSI_FILE_CONSTANT, ctx->depth_range_const,
288                                TGSI_SWIZZLE_Y,
289                                false);
290 
291    /* gl_FragDepth = max(gl_FragDepth, min(gl_DepthRange.near, gl_DepthRange.far)) */
292    tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MAX,
293                                TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
294                                TGSI_WRITEMASK_X,
295                                src0_file, src0_index, src0_swizzle,
296                                TGSI_FILE_TEMPORARY, ctx->depth_range_corrected,
297                                TGSI_SWIZZLE_X, false);
298 
299    /* gl_FragDepth = min(gl_FragDepth, max(gl_DepthRange.near, gl_DepthRange.far)) */
300    tgsi_transform_op2_swz_inst(tctx, TGSI_OPCODE_MIN,
301                                TGSI_FILE_OUTPUT, ctx->pos_output,
302                                TGSI_WRITEMASK_Z,
303                                TGSI_FILE_TEMPORARY, ctx->pos_output_temp,
304                                TGSI_SWIZZLE_X,
305                                TGSI_FILE_TEMPORARY, ctx->depth_range_corrected,
306                                TGSI_SWIZZLE_Y, false);
307 }
308 
309 static void
transform_instr(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)310 transform_instr(struct tgsi_transform_context *tctx,
311                 struct tgsi_full_instruction *inst)
312 {
313    struct tgsi_depth_clamp_transform *ctx = tgsi_depth_clamp_transform(tctx);
314 
315    if (ctx->pos_output >= 0) {
316       /* replace writes to gl_Position / gl_FragDepth with a temp-variable
317        */
318       for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
319          if (inst->Dst[i].Register.File == TGSI_FILE_OUTPUT &&
320              inst->Dst[i].Register.Index == ctx->pos_output) {
321             inst->Dst[i].Register.File = TGSI_FILE_TEMPORARY;
322             inst->Dst[i].Register.Index = ctx->pos_output_temp;
323          }
324       }
325    }
326 
327    if (ctx->info.reads_z) {
328       /* replace reads from gl_FragCoord with temp-variable
329        */
330       assert(ctx->pos_input_temp >= 0);
331       for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
332          if (inst->Src[i].Register.File == TGSI_FILE_INPUT &&
333              inst->Src[i].Register.Index == ctx->pos_input) {
334             inst->Src[i].Register.File = TGSI_FILE_TEMPORARY;
335             inst->Src[i].Register.Index = ctx->pos_input_temp;
336          }
337       }
338    }
339 
340    /* In a GS each we have to add the z-write opilog for each emit
341     */
342    if (ctx->info.processor == PIPE_SHADER_GEOMETRY &&
343        inst->Instruction.Opcode == TGSI_OPCODE_EMIT)
344       epilog_last_vertex_stage(tctx);
345 
346    tctx->emit_instruction(tctx, inst);
347 }
348 
349 const struct tgsi_token *
st_tgsi_lower_depth_clamp(const struct tgsi_token * tokens,int depth_range_const,bool clip_negative_one_to_one)350 st_tgsi_lower_depth_clamp(const struct tgsi_token *tokens,
351                           int depth_range_const,
352                           bool clip_negative_one_to_one)
353 {
354    struct tgsi_depth_clamp_transform ctx;
355    struct tgsi_token *newtoks;
356    int newlen;
357 
358    memset(&ctx, 0, sizeof(ctx));
359    tgsi_scan_shader(tokens, &ctx.info);
360 
361    /* we only want to do this for the fragment shader, and the shader-stage
362     * right before it, but in the first pass there might be no "next" shader
363     */
364    if (ctx.info.processor != PIPE_SHADER_FRAGMENT &&
365        ctx.info.processor != PIPE_SHADER_GEOMETRY &&
366        ctx.info.processor != PIPE_SHADER_VERTEX &&
367        ctx.info.processor != PIPE_SHADER_TESS_EVAL &&
368        (ctx.info.properties[TGSI_PROPERTY_NEXT_SHADER] > PIPE_SHADER_VERTEX &&
369        (ctx.info.properties[TGSI_PROPERTY_NEXT_SHADER] != PIPE_SHADER_FRAGMENT)))  {
370       return tokens;
371    }
372 
373    ctx.base.transform_declaration = transform_decl;
374    ctx.base.transform_instruction = transform_instr;
375 
376    if (ctx.info.processor == PIPE_SHADER_FRAGMENT) {
377       ctx.base.prolog = prolog_fs;
378       ctx.base.epilog = epilog_fs;
379    } else {
380       ctx.base.prolog = prolog_last_vertex_stage;
381       ctx.base.epilog = epilog_last_vertex_stage;
382    }
383 
384    ctx.pos_output = ctx.pos_input = -1;
385    ctx.depth_range_const = depth_range_const;
386    ctx.depth_clip_minus_one_to_one = clip_negative_one_to_one;
387 
388    /* We add approximately 30 tokens per Z write, so add this per vertex in
389     * a GS and some additional tokes for VS and TES
390     */
391    newlen = tgsi_num_tokens(tokens) +
392             30 * ctx.info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] +
393             120;
394 
395    newtoks = tgsi_alloc_tokens(newlen);
396    if (!newtoks)
397       return tokens;
398 
399    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
400 
401    return newtoks;
402 }
403 
404 const struct tgsi_token *
st_tgsi_lower_depth_clamp_fs(const struct tgsi_token * tokens,int depth_range_const)405 st_tgsi_lower_depth_clamp_fs(const struct tgsi_token *tokens,
406                              int depth_range_const)
407 {
408    return st_tgsi_lower_depth_clamp(tokens, depth_range_const, false);
409 }
410