1 /**************************************************************************
2  *
3  * Copyright 2007-2008 VMware, Inc.
4  * All Rights Reserved.
5  * Copyright 2009-2010 VMware, Inc.  All rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 #ifndef TGSI_EXEC_H
30 #define TGSI_EXEC_H
31 
32 #include "pipe/p_compiler.h"
33 #include "pipe/p_state.h"
34 #include "pipe/p_shader_tokens.h"
35 
36 #if defined __cplusplus
37 extern "C" {
38 #endif
39 
40 #define TGSI_CHAN_X 0
41 #define TGSI_CHAN_Y 1
42 #define TGSI_CHAN_Z 2
43 #define TGSI_CHAN_W 3
44 
45 #define TGSI_NUM_CHANNELS 4  /* R,G,B,A */
46 #define TGSI_QUAD_SIZE    4  /* 4 pixel/quad */
47 
48 #define TGSI_FOR_EACH_CHANNEL( CHAN )\
49    for (CHAN = 0; CHAN < TGSI_NUM_CHANNELS; CHAN++)
50 
51 #define TGSI_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
52    ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
53 
54 #define TGSI_IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
55    if (TGSI_IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
56 
57 #define TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
58    TGSI_FOR_EACH_CHANNEL( CHAN )\
59       TGSI_IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
60 
61 #define TGSI_IS_DST1_CHANNEL_ENABLED( INST, CHAN )\
62    ((INST)->Dst[1].Register.WriteMask & (1 << (CHAN)))
63 
64 #define TGSI_IF_IS_DST1_CHANNEL_ENABLED( INST, CHAN )\
65    if (TGSI_IS_DST1_CHANNEL_ENABLED( INST, CHAN ))
66 
67 #define TGSI_FOR_EACH_DST1_ENABLED_CHANNEL( INST, CHAN )\
68    TGSI_FOR_EACH_CHANNEL( CHAN )\
69       TGSI_IF_IS_DST1_CHANNEL_ENABLED( INST, CHAN )
70 
71 /**
72   * Registers may be treated as float, signed int or unsigned int.
73   */
74 union tgsi_exec_channel
75 {
76    float    f[TGSI_QUAD_SIZE];
77    int      i[TGSI_QUAD_SIZE];
78    unsigned u[TGSI_QUAD_SIZE];
79 } ALIGN16;
80 
81 /**
82   * A vector[RGBA] of channels[4 pixels]
83   */
84 struct ALIGN16 tgsi_exec_vector
85 {
86    union tgsi_exec_channel xyzw[TGSI_NUM_CHANNELS];
87 };
88 
89 /**
90  * For fragment programs, information for computing fragment input
91  * values from plane equation of the triangle/line.
92  */
93 struct tgsi_interp_coef
94 {
95    float a0[TGSI_NUM_CHANNELS];	/* in an xyzw layout */
96    float dadx[TGSI_NUM_CHANNELS];
97    float dady[TGSI_NUM_CHANNELS];
98 };
99 
100 enum tgsi_sampler_control
101 {
102    TGSI_SAMPLER_LOD_NONE,
103    TGSI_SAMPLER_LOD_BIAS,
104    TGSI_SAMPLER_LOD_EXPLICIT,
105    TGSI_SAMPLER_LOD_ZERO,
106    TGSI_SAMPLER_DERIVS_EXPLICIT,
107    TGSI_SAMPLER_GATHER,
108 };
109 
110 struct tgsi_image_params {
111    unsigned unit;
112    unsigned tgsi_tex_instr;
113    enum pipe_format format;
114    unsigned execmask;
115 };
116 
117 struct tgsi_image {
118    /* image interfaces */
119    void (*load)(const struct tgsi_image *image,
120                 const struct tgsi_image_params *params,
121                 const int s[TGSI_QUAD_SIZE],
122                 const int t[TGSI_QUAD_SIZE],
123                 const int r[TGSI_QUAD_SIZE],
124                 const int sample[TGSI_QUAD_SIZE],
125                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
126 
127    void (*store)(const struct tgsi_image *image,
128                  const struct tgsi_image_params *params,
129                  const int s[TGSI_QUAD_SIZE],
130                  const int t[TGSI_QUAD_SIZE],
131                  const int r[TGSI_QUAD_SIZE],
132                  const int sample[TGSI_QUAD_SIZE],
133                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
134 
135    void (*op)(const struct tgsi_image *image,
136               const struct tgsi_image_params *params,
137               enum tgsi_opcode opcode,
138               const int s[TGSI_QUAD_SIZE],
139               const int t[TGSI_QUAD_SIZE],
140               const int r[TGSI_QUAD_SIZE],
141               const int sample[TGSI_QUAD_SIZE],
142               float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
143               float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
144 
145    void (*get_dims)(const struct tgsi_image *image,
146                     const struct tgsi_image_params *params,
147                     int dims[4]);
148 };
149 
150 struct tgsi_buffer_params {
151    unsigned unit;
152    unsigned execmask;
153    unsigned writemask;
154 };
155 
156 /* SSBO interfaces */
157 struct tgsi_buffer {
158    void *(*lookup)(const struct tgsi_buffer *buffer,
159                    uint32_t unit, uint32_t *size);
160 };
161 
162 /**
163  * Information for sampling textures, which must be implemented
164  * by code outside the TGSI executor.
165  */
166 struct tgsi_sampler
167 {
168    /** Get samples for four fragments in a quad */
169    /* this interface contains 5 sets of channels that vary
170     * depending on the sampler.
171     * s - the first texture coordinate for sampling.
172     * t - the second texture coordinate for sampling - unused for 1D,
173           layer for 1D arrays.
174     * r - the third coordinate for sampling for 3D, cube, cube arrays,
175     *     layer for 2D arrays. Compare value for 1D/2D shadows.
176     * c0 - Compare value for shadow cube and shadow 2d arrays,
177     *      layer for cube arrays.
178     * derivs - explicit derivatives.
179     * offset - texel offsets
180     * lod - lod value, except for shadow cube arrays (compare value there).
181     */
182    void (*get_samples)(struct tgsi_sampler *sampler,
183                        const unsigned sview_index,
184                        const unsigned sampler_index,
185                        const float s[TGSI_QUAD_SIZE],
186                        const float t[TGSI_QUAD_SIZE],
187                        const float r[TGSI_QUAD_SIZE],
188                        const float c0[TGSI_QUAD_SIZE],
189                        const float c1[TGSI_QUAD_SIZE],
190                        float derivs[3][2][TGSI_QUAD_SIZE],
191                        const int8_t offset[3],
192                        enum tgsi_sampler_control control,
193                        float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
194    void (*get_dims)(struct tgsi_sampler *sampler,
195                     const unsigned sview_index,
196                     int level, int dims[4]);
197    void (*get_texel)(struct tgsi_sampler *sampler,
198                      const unsigned sview_index,
199                      const int i[TGSI_QUAD_SIZE],
200                      const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
201                      const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
202                      float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
203    void (*query_lod)(const struct tgsi_sampler *tgsi_sampler,
204                      const unsigned sview_index,
205                      const unsigned sampler_index,
206                      const float s[TGSI_QUAD_SIZE],
207                      const float t[TGSI_QUAD_SIZE],
208                      const float p[TGSI_QUAD_SIZE],
209                      const float c0[TGSI_QUAD_SIZE],
210                      const enum tgsi_sampler_control control,
211                      float mipmap[TGSI_QUAD_SIZE],
212                      float lod[TGSI_QUAD_SIZE]);
213 };
214 
215 #define TGSI_EXEC_NUM_TEMPS       4096
216 
217 #define TGSI_EXEC_MAX_NESTING  32
218 #define TGSI_EXEC_MAX_COND_NESTING  TGSI_EXEC_MAX_NESTING
219 #define TGSI_EXEC_MAX_LOOP_NESTING  TGSI_EXEC_MAX_NESTING
220 #define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING
221 #define TGSI_EXEC_MAX_CALL_NESTING  TGSI_EXEC_MAX_NESTING
222 
223 /* The maximum number of input attributes per vertex. For 2D
224  * input register files, this is the stride between two 1D
225  * arrays.
226  */
227 #define TGSI_EXEC_MAX_INPUT_ATTRIBS 32
228 
229 /* The maximum number of bytes per constant buffer.
230  */
231 #define TGSI_EXEC_MAX_CONST_BUFFER_SIZE  (4096 * sizeof(float[4]))
232 
233 /* The maximum number of vertices per primitive */
234 #define TGSI_MAX_PRIM_VERTICES 6
235 
236 /* The maximum number of primitives to be generated */
237 #define TGSI_MAX_PRIMITIVES 64
238 
239 /* The maximum total number of vertices */
240 #define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS)
241 
242 #define TGSI_MAX_MISC_INPUTS 8
243 
244 #define TGSI_MAX_VERTEX_STREAMS 4
245 
246 /** function call/activation record */
247 struct tgsi_call_record
248 {
249    uint CondStackTop;
250    uint LoopStackTop;
251    uint ContStackTop;
252    int SwitchStackTop;
253    int BreakStackTop;
254    uint ReturnAddr;
255 };
256 
257 
258 /* Switch-case block state. */
259 struct tgsi_switch_record {
260    uint mask;                          /**< execution mask */
261    union tgsi_exec_channel selector;   /**< a value case statements are compared to */
262    uint defaultMask;                   /**< non-execute mask for default case */
263 };
264 
265 
266 enum tgsi_break_type {
267    TGSI_EXEC_BREAK_INSIDE_LOOP,
268    TGSI_EXEC_BREAK_INSIDE_SWITCH
269 };
270 
271 
272 #define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING)
273 
274 typedef float float4[4];
275 
276 struct tgsi_exec_machine;
277 
278 typedef void (* apply_sample_offset_func)(
279    const struct tgsi_exec_machine *mach,
280    unsigned attrib,
281    unsigned chan,
282    float ofs_x,
283    float ofs_y,
284    union tgsi_exec_channel *out_chan);
285 
286 /**
287  * Run-time virtual machine state for executing TGSI shader.
288  */
289 struct ALIGN16 tgsi_exec_machine
290 {
291    /* Total = program temporaries + internal temporaries
292     */
293    struct tgsi_exec_vector       Temps[TGSI_EXEC_NUM_TEMPS];
294 
295    unsigned                       ImmsReserved;
296    float4                         *Imms;
297 
298    struct tgsi_exec_vector       *Inputs;
299    struct tgsi_exec_vector       *Outputs;
300    apply_sample_offset_func           *InputSampleOffsetApply;
301 
302    /* System values */
303    unsigned                      SysSemanticToIndex[TGSI_SEMANTIC_COUNT];
304    struct tgsi_exec_vector       SystemValue[TGSI_MAX_MISC_INPUTS];
305 
306    struct tgsi_exec_vector       Addrs[3];
307 
308    struct tgsi_sampler           *Sampler;
309 
310    struct tgsi_image             *Image;
311    struct tgsi_buffer            *Buffer;
312    unsigned                      ImmLimit;
313 
314    const void *Consts[PIPE_MAX_CONSTANT_BUFFERS];
315    unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS];
316 
317    const struct tgsi_token       *Tokens;   /**< Declarations, instructions */
318    enum pipe_shader_type         ShaderType; /**< PIPE_SHADER_x */
319 
320    /* GEOMETRY processor only. */
321    /* Number of vertices emitted per emitted primitive. */
322    unsigned                      *Primitives[TGSI_MAX_VERTEX_STREAMS];
323    /* Offsets in ->Outputs of the primitives' vertex output data */
324    unsigned                      *PrimitiveOffsets[TGSI_MAX_VERTEX_STREAMS];
325    unsigned                       NumOutputs;
326    unsigned                       MaxOutputVertices;
327    /* Offset in ->Outputs for the current vertex to be emitted. */
328    unsigned                       OutputVertexOffset;
329    /* Number of primitives emitted. */
330    unsigned                       OutputPrimCount[TGSI_MAX_VERTEX_STREAMS];
331 
332    /* FRAGMENT processor only. */
333    const struct tgsi_interp_coef *InterpCoefs;
334    struct tgsi_exec_vector       QuadPos;
335    float                         Face;    /**< +1 if front facing, -1 if back facing */
336    bool                          flatshade_color;
337 
338    /* Compute Only */
339    void                          *LocalMem;
340    unsigned                      LocalMemSize;
341 
342    /* See GLSL 4.50 specification for definition of helper invocations */
343    uint NonHelperMask;  /**< non-helpers */
344    /* Conditional execution masks */
345    uint CondMask;  /**< For IF/ELSE/ENDIF */
346    uint LoopMask;  /**< For BGNLOOP/ENDLOOP */
347    uint ContMask;  /**< For loop CONT statements */
348    uint FuncMask;  /**< For function calls */
349    uint ExecMask;  /**< = CondMask & LoopMask */
350    uint KillMask;  /**< Mask of channels killed in the current shader execution */
351 
352    /* Current switch-case state. */
353    struct tgsi_switch_record Switch;
354 
355    /* Current break type. */
356    enum tgsi_break_type BreakType;
357 
358    /** Condition mask stack (for nested conditionals) */
359    uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
360    int CondStackTop;
361 
362    /** Loop mask stack (for nested loops) */
363    uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
364    int LoopStackTop;
365 
366    /** Loop label stack */
367    uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING];
368    int LoopLabelStackTop;
369 
370    /** Loop continue mask stack (see comments in tgsi_exec.c) */
371    uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
372    int ContStackTop;
373 
374    /** Switch case stack */
375    struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING];
376    int SwitchStackTop;
377 
378    enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK];
379    int BreakStackTop;
380 
381    /** Function execution mask stack (for executing subroutine code) */
382    uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
383    int FuncStackTop;
384 
385    /** Function call stack for saving/restoring the program counter */
386    struct tgsi_call_record CallStack[TGSI_EXEC_MAX_CALL_NESTING];
387    int CallStackTop;
388 
389    struct tgsi_full_instruction *Instructions;
390    uint NumInstructions;
391 
392    struct tgsi_full_declaration *Declarations;
393    uint NumDeclarations;
394 
395    struct tgsi_declaration_sampler_view
396       SamplerViews[PIPE_MAX_SHADER_SAMPLER_VIEWS];
397 
398    boolean UsedGeometryShader;
399 
400    int pc;
401 };
402 
403 struct tgsi_exec_machine *
404 tgsi_exec_machine_create(enum pipe_shader_type shader_type);
405 
406 void
407 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach);
408 
409 
410 void
411 tgsi_exec_machine_bind_shader(
412    struct tgsi_exec_machine *mach,
413    const struct tgsi_token *tokens,
414    struct tgsi_sampler *sampler,
415    struct tgsi_image *image,
416    struct tgsi_buffer *buffer);
417 
418 uint
419 tgsi_exec_machine_run(
420    struct tgsi_exec_machine *mach, int start_pc );
421 
422 
423 void
424 tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
425 
426 
427 extern void
428 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
429                                unsigned num_bufs,
430                                const void **bufs,
431                                const unsigned *buf_sizes);
432 
433 
434 static inline int
tgsi_exec_get_shader_param(enum pipe_shader_cap param)435 tgsi_exec_get_shader_param(enum pipe_shader_cap param)
436 {
437    switch(param) {
438    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
439    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
440    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
441    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
442       return INT_MAX;
443    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
444       return TGSI_EXEC_MAX_NESTING;
445    case PIPE_SHADER_CAP_MAX_INPUTS:
446       return TGSI_EXEC_MAX_INPUT_ATTRIBS;
447    case PIPE_SHADER_CAP_MAX_OUTPUTS:
448       return 32;
449    case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
450       return TGSI_EXEC_MAX_CONST_BUFFER_SIZE;
451    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
452       return PIPE_MAX_CONSTANT_BUFFERS;
453    case PIPE_SHADER_CAP_MAX_TEMPS:
454       return TGSI_EXEC_NUM_TEMPS;
455    case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
456       return 1;
457    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
458    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
459    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
460    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
461       return 1;
462    case PIPE_SHADER_CAP_SUBROUTINES:
463       return 1;
464    case PIPE_SHADER_CAP_INTEGERS:
465       return 1;
466    case PIPE_SHADER_CAP_INT64_ATOMICS:
467    case PIPE_SHADER_CAP_FP16:
468    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
469    case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
470    case PIPE_SHADER_CAP_INT16:
471    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
472       return 0;
473    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
474       return PIPE_MAX_SAMPLERS;
475    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
476       return PIPE_MAX_SHADER_SAMPLER_VIEWS;
477    case PIPE_SHADER_CAP_PREFERRED_IR:
478       return PIPE_SHADER_IR_TGSI;
479    case PIPE_SHADER_CAP_SUPPORTED_IRS:
480       return 1 << PIPE_SHADER_IR_TGSI;
481    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
482       return 1;
483    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
484    case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
485    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
486       return 1;
487    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
488    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
489    case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
490    case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
491    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
492    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
493       return 0;
494    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
495       return PIPE_MAX_SHADER_BUFFERS;
496    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
497       return PIPE_MAX_SHADER_IMAGES;
498 
499    case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
500       return 32;
501    }
502    /* if we get here, we missed a shader cap above (and should have seen
503     * a compiler warning.)
504     */
505    return 0;
506 }
507 
508 #if defined __cplusplus
509 } /* extern "C" */
510 #endif
511 
512 #endif /* TGSI_EXEC_H */
513