1 /**************************************************************************
2  *
3  * Copyright 2007-2008 VMware, Inc.
4  * All Rights Reserved.
5  * Copyright 2009-2010 VMware, Inc.  All rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * TGSI interpreter/executor.
31  *
32  * Flow control information:
33  *
34  * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35  * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36  * care since a condition may be true for some quad components but false
37  * for other components.
38  *
39  * We basically execute all statements (even if they're in the part of
40  * an IF/ELSE clause that's "not taken") and use a special mask to
41  * control writing to destination registers.  This is the ExecMask.
42  * See store_dest().
43  *
44  * The ExecMask is computed from three other masks (CondMask, LoopMask and
45  * ContMask) which are controlled by the flow control instructions (namely:
46  * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47  *
48  *
49  * Authors:
50  *   Michal Krol
51  *   Brian Paul
52  */
53 
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_half.h"
62 #include "util/u_memory.h"
63 #include "util/u_math.h"
64 #include "util/rounding.h"
65 
66 
67 #define DEBUG_EXECUTION 0
68 
69 
70 #define FAST_MATH 0
71 
72 #define TILE_TOP_LEFT     0
73 #define TILE_TOP_RIGHT    1
74 #define TILE_BOTTOM_LEFT  2
75 #define TILE_BOTTOM_RIGHT 3
76 
77 union tgsi_double_channel {
78    double d[TGSI_QUAD_SIZE];
79    unsigned u[TGSI_QUAD_SIZE][2];
80    uint64_t u64[TGSI_QUAD_SIZE];
81    int64_t i64[TGSI_QUAD_SIZE];
82 };
83 
84 struct tgsi_double_vector {
85    union tgsi_double_channel xy;
86    union tgsi_double_channel zw;
87 };
88 
89 static void
micro_abs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)90 micro_abs(union tgsi_exec_channel *dst,
91           const union tgsi_exec_channel *src)
92 {
93    dst->f[0] = fabsf(src->f[0]);
94    dst->f[1] = fabsf(src->f[1]);
95    dst->f[2] = fabsf(src->f[2]);
96    dst->f[3] = fabsf(src->f[3]);
97 }
98 
99 static void
micro_arl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)100 micro_arl(union tgsi_exec_channel *dst,
101           const union tgsi_exec_channel *src)
102 {
103    dst->i[0] = (int)floorf(src->f[0]);
104    dst->i[1] = (int)floorf(src->f[1]);
105    dst->i[2] = (int)floorf(src->f[2]);
106    dst->i[3] = (int)floorf(src->f[3]);
107 }
108 
109 static void
micro_arr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)110 micro_arr(union tgsi_exec_channel *dst,
111           const union tgsi_exec_channel *src)
112 {
113    dst->i[0] = (int)floorf(src->f[0] + 0.5f);
114    dst->i[1] = (int)floorf(src->f[1] + 0.5f);
115    dst->i[2] = (int)floorf(src->f[2] + 0.5f);
116    dst->i[3] = (int)floorf(src->f[3] + 0.5f);
117 }
118 
119 static void
micro_ceil(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)120 micro_ceil(union tgsi_exec_channel *dst,
121            const union tgsi_exec_channel *src)
122 {
123    dst->f[0] = ceilf(src->f[0]);
124    dst->f[1] = ceilf(src->f[1]);
125    dst->f[2] = ceilf(src->f[2]);
126    dst->f[3] = ceilf(src->f[3]);
127 }
128 
129 static void
micro_cmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)130 micro_cmp(union tgsi_exec_channel *dst,
131           const union tgsi_exec_channel *src0,
132           const union tgsi_exec_channel *src1,
133           const union tgsi_exec_channel *src2)
134 {
135    dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
136    dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
137    dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
138    dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
139 }
140 
141 static void
micro_cos(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)142 micro_cos(union tgsi_exec_channel *dst,
143           const union tgsi_exec_channel *src)
144 {
145    dst->f[0] = cosf(src->f[0]);
146    dst->f[1] = cosf(src->f[1]);
147    dst->f[2] = cosf(src->f[2]);
148    dst->f[3] = cosf(src->f[3]);
149 }
150 
151 static void
micro_d2f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)152 micro_d2f(union tgsi_exec_channel *dst,
153           const union tgsi_double_channel *src)
154 {
155    dst->f[0] = (float)src->d[0];
156    dst->f[1] = (float)src->d[1];
157    dst->f[2] = (float)src->d[2];
158    dst->f[3] = (float)src->d[3];
159 }
160 
161 static void
micro_d2i(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)162 micro_d2i(union tgsi_exec_channel *dst,
163           const union tgsi_double_channel *src)
164 {
165    dst->i[0] = (int)src->d[0];
166    dst->i[1] = (int)src->d[1];
167    dst->i[2] = (int)src->d[2];
168    dst->i[3] = (int)src->d[3];
169 }
170 
171 static void
micro_d2u(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)172 micro_d2u(union tgsi_exec_channel *dst,
173           const union tgsi_double_channel *src)
174 {
175    dst->u[0] = (unsigned)src->d[0];
176    dst->u[1] = (unsigned)src->d[1];
177    dst->u[2] = (unsigned)src->d[2];
178    dst->u[3] = (unsigned)src->d[3];
179 }
180 static void
micro_dabs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)181 micro_dabs(union tgsi_double_channel *dst,
182            const union tgsi_double_channel *src)
183 {
184    dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
185    dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
186    dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
187    dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
188 }
189 
190 static void
micro_dadd(union tgsi_double_channel * dst,const union tgsi_double_channel * src)191 micro_dadd(union tgsi_double_channel *dst,
192           const union tgsi_double_channel *src)
193 {
194    dst->d[0] = src[0].d[0] + src[1].d[0];
195    dst->d[1] = src[0].d[1] + src[1].d[1];
196    dst->d[2] = src[0].d[2] + src[1].d[2];
197    dst->d[3] = src[0].d[3] + src[1].d[3];
198 }
199 
200 static void
micro_ddiv(union tgsi_double_channel * dst,const union tgsi_double_channel * src)201 micro_ddiv(union tgsi_double_channel *dst,
202           const union tgsi_double_channel *src)
203 {
204    dst->d[0] = src[0].d[0] / src[1].d[0];
205    dst->d[1] = src[0].d[1] / src[1].d[1];
206    dst->d[2] = src[0].d[2] / src[1].d[2];
207    dst->d[3] = src[0].d[3] / src[1].d[3];
208 }
209 
210 static void
micro_ddx(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)211 micro_ddx(union tgsi_exec_channel *dst,
212           const union tgsi_exec_channel *src)
213 {
214    dst->f[0] =
215    dst->f[1] =
216    dst->f[2] =
217    dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
218 }
219 
220 static void
micro_ddx_fine(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)221 micro_ddx_fine(union tgsi_exec_channel *dst,
222           const union tgsi_exec_channel *src)
223 {
224    dst->f[0] =
225    dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT];
226    dst->f[2] =
227    dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
228 }
229 
230 
231 static void
micro_ddy(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)232 micro_ddy(union tgsi_exec_channel *dst,
233           const union tgsi_exec_channel *src)
234 {
235    dst->f[0] =
236    dst->f[1] =
237    dst->f[2] =
238    dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
239 }
240 
241 static void
micro_ddy_fine(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)242 micro_ddy_fine(union tgsi_exec_channel *dst,
243           const union tgsi_exec_channel *src)
244 {
245    dst->f[0] =
246    dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
247    dst->f[1] =
248    dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT];
249 }
250 
251 static void
micro_dmul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)252 micro_dmul(union tgsi_double_channel *dst,
253            const union tgsi_double_channel *src)
254 {
255    dst->d[0] = src[0].d[0] * src[1].d[0];
256    dst->d[1] = src[0].d[1] * src[1].d[1];
257    dst->d[2] = src[0].d[2] * src[1].d[2];
258    dst->d[3] = src[0].d[3] * src[1].d[3];
259 }
260 
261 static void
micro_dmax(union tgsi_double_channel * dst,const union tgsi_double_channel * src)262 micro_dmax(union tgsi_double_channel *dst,
263            const union tgsi_double_channel *src)
264 {
265    dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0];
266    dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1];
267    dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2];
268    dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3];
269 }
270 
271 static void
micro_dmin(union tgsi_double_channel * dst,const union tgsi_double_channel * src)272 micro_dmin(union tgsi_double_channel *dst,
273            const union tgsi_double_channel *src)
274 {
275    dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0];
276    dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1];
277    dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2];
278    dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3];
279 }
280 
281 static void
micro_dneg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)282 micro_dneg(union tgsi_double_channel *dst,
283            const union tgsi_double_channel *src)
284 {
285    dst->d[0] = -src->d[0];
286    dst->d[1] = -src->d[1];
287    dst->d[2] = -src->d[2];
288    dst->d[3] = -src->d[3];
289 }
290 
291 static void
micro_dslt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)292 micro_dslt(union tgsi_double_channel *dst,
293            const union tgsi_double_channel *src)
294 {
295    dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
296    dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
297    dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
298    dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
299 }
300 
301 static void
micro_dsne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)302 micro_dsne(union tgsi_double_channel *dst,
303            const union tgsi_double_channel *src)
304 {
305    dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
306    dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
307    dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
308    dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
309 }
310 
311 static void
micro_dsge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)312 micro_dsge(union tgsi_double_channel *dst,
313            const union tgsi_double_channel *src)
314 {
315    dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
316    dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
317    dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
318    dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
319 }
320 
321 static void
micro_dseq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)322 micro_dseq(union tgsi_double_channel *dst,
323            const union tgsi_double_channel *src)
324 {
325    dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
326    dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
327    dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
328    dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
329 }
330 
331 static void
micro_drcp(union tgsi_double_channel * dst,const union tgsi_double_channel * src)332 micro_drcp(union tgsi_double_channel *dst,
333            const union tgsi_double_channel *src)
334 {
335    dst->d[0] = 1.0 / src->d[0];
336    dst->d[1] = 1.0 / src->d[1];
337    dst->d[2] = 1.0 / src->d[2];
338    dst->d[3] = 1.0 / src->d[3];
339 }
340 
341 static void
micro_dsqrt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)342 micro_dsqrt(union tgsi_double_channel *dst,
343             const union tgsi_double_channel *src)
344 {
345    dst->d[0] = sqrt(src->d[0]);
346    dst->d[1] = sqrt(src->d[1]);
347    dst->d[2] = sqrt(src->d[2]);
348    dst->d[3] = sqrt(src->d[3]);
349 }
350 
351 static void
micro_drsq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)352 micro_drsq(union tgsi_double_channel *dst,
353           const union tgsi_double_channel *src)
354 {
355    dst->d[0] = 1.0 / sqrt(src->d[0]);
356    dst->d[1] = 1.0 / sqrt(src->d[1]);
357    dst->d[2] = 1.0 / sqrt(src->d[2]);
358    dst->d[3] = 1.0 / sqrt(src->d[3]);
359 }
360 
361 static void
micro_dmad(union tgsi_double_channel * dst,const union tgsi_double_channel * src)362 micro_dmad(union tgsi_double_channel *dst,
363            const union tgsi_double_channel *src)
364 {
365    dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
366    dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
367    dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
368    dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
369 }
370 
371 static void
micro_dfrac(union tgsi_double_channel * dst,const union tgsi_double_channel * src)372 micro_dfrac(union tgsi_double_channel *dst,
373             const union tgsi_double_channel *src)
374 {
375    dst->d[0] = src->d[0] - floor(src->d[0]);
376    dst->d[1] = src->d[1] - floor(src->d[1]);
377    dst->d[2] = src->d[2] - floor(src->d[2]);
378    dst->d[3] = src->d[3] - floor(src->d[3]);
379 }
380 
381 static void
micro_dldexp(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)382 micro_dldexp(union tgsi_double_channel *dst,
383              const union tgsi_double_channel *src0,
384              union tgsi_exec_channel *src1)
385 {
386    dst->d[0] = ldexp(src0->d[0], src1->i[0]);
387    dst->d[1] = ldexp(src0->d[1], src1->i[1]);
388    dst->d[2] = ldexp(src0->d[2], src1->i[2]);
389    dst->d[3] = ldexp(src0->d[3], src1->i[3]);
390 }
391 
392 static void
micro_dfracexp(union tgsi_double_channel * dst,union tgsi_exec_channel * dst_exp,const union tgsi_double_channel * src)393 micro_dfracexp(union tgsi_double_channel *dst,
394                union tgsi_exec_channel *dst_exp,
395                const union tgsi_double_channel *src)
396 {
397    dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
398    dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
399    dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
400    dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
401 }
402 
403 static void
micro_exp2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)404 micro_exp2(union tgsi_exec_channel *dst,
405            const union tgsi_exec_channel *src)
406 {
407 #if FAST_MATH
408    dst->f[0] = util_fast_exp2(src->f[0]);
409    dst->f[1] = util_fast_exp2(src->f[1]);
410    dst->f[2] = util_fast_exp2(src->f[2]);
411    dst->f[3] = util_fast_exp2(src->f[3]);
412 #else
413 #if DEBUG
414    /* Inf is okay for this instruction, so clamp it to silence assertions. */
415    uint i;
416    union tgsi_exec_channel clamped;
417 
418    for (i = 0; i < 4; i++) {
419       if (src->f[i] > 127.99999f) {
420          clamped.f[i] = 127.99999f;
421       } else if (src->f[i] < -126.99999f) {
422          clamped.f[i] = -126.99999f;
423       } else {
424          clamped.f[i] = src->f[i];
425       }
426    }
427    src = &clamped;
428 #endif /* DEBUG */
429 
430    dst->f[0] = powf(2.0f, src->f[0]);
431    dst->f[1] = powf(2.0f, src->f[1]);
432    dst->f[2] = powf(2.0f, src->f[2]);
433    dst->f[3] = powf(2.0f, src->f[3]);
434 #endif /* FAST_MATH */
435 }
436 
437 static void
micro_f2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)438 micro_f2d(union tgsi_double_channel *dst,
439           const union tgsi_exec_channel *src)
440 {
441    dst->d[0] = (double)src->f[0];
442    dst->d[1] = (double)src->f[1];
443    dst->d[2] = (double)src->f[2];
444    dst->d[3] = (double)src->f[3];
445 }
446 
447 static void
micro_flr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)448 micro_flr(union tgsi_exec_channel *dst,
449           const union tgsi_exec_channel *src)
450 {
451    dst->f[0] = floorf(src->f[0]);
452    dst->f[1] = floorf(src->f[1]);
453    dst->f[2] = floorf(src->f[2]);
454    dst->f[3] = floorf(src->f[3]);
455 }
456 
457 static void
micro_frc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)458 micro_frc(union tgsi_exec_channel *dst,
459           const union tgsi_exec_channel *src)
460 {
461    dst->f[0] = src->f[0] - floorf(src->f[0]);
462    dst->f[1] = src->f[1] - floorf(src->f[1]);
463    dst->f[2] = src->f[2] - floorf(src->f[2]);
464    dst->f[3] = src->f[3] - floorf(src->f[3]);
465 }
466 
467 static void
micro_i2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)468 micro_i2d(union tgsi_double_channel *dst,
469           const union tgsi_exec_channel *src)
470 {
471    dst->d[0] = (double)src->i[0];
472    dst->d[1] = (double)src->i[1];
473    dst->d[2] = (double)src->i[2];
474    dst->d[3] = (double)src->i[3];
475 }
476 
477 static void
micro_iabs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)478 micro_iabs(union tgsi_exec_channel *dst,
479            const union tgsi_exec_channel *src)
480 {
481    dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
482    dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
483    dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
484    dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
485 }
486 
487 static void
micro_ineg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)488 micro_ineg(union tgsi_exec_channel *dst,
489            const union tgsi_exec_channel *src)
490 {
491    dst->i[0] = -src->i[0];
492    dst->i[1] = -src->i[1];
493    dst->i[2] = -src->i[2];
494    dst->i[3] = -src->i[3];
495 }
496 
497 static void
micro_lg2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)498 micro_lg2(union tgsi_exec_channel *dst,
499           const union tgsi_exec_channel *src)
500 {
501 #if FAST_MATH
502    dst->f[0] = util_fast_log2(src->f[0]);
503    dst->f[1] = util_fast_log2(src->f[1]);
504    dst->f[2] = util_fast_log2(src->f[2]);
505    dst->f[3] = util_fast_log2(src->f[3]);
506 #else
507    dst->f[0] = logf(src->f[0]) * 1.442695f;
508    dst->f[1] = logf(src->f[1]) * 1.442695f;
509    dst->f[2] = logf(src->f[2]) * 1.442695f;
510    dst->f[3] = logf(src->f[3]) * 1.442695f;
511 #endif
512 }
513 
514 static void
micro_lrp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)515 micro_lrp(union tgsi_exec_channel *dst,
516           const union tgsi_exec_channel *src0,
517           const union tgsi_exec_channel *src1,
518           const union tgsi_exec_channel *src2)
519 {
520    dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
521    dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
522    dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
523    dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
524 }
525 
526 static void
micro_mad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)527 micro_mad(union tgsi_exec_channel *dst,
528           const union tgsi_exec_channel *src0,
529           const union tgsi_exec_channel *src1,
530           const union tgsi_exec_channel *src2)
531 {
532    dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
533    dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
534    dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
535    dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
536 }
537 
538 static void
micro_mov(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)539 micro_mov(union tgsi_exec_channel *dst,
540           const union tgsi_exec_channel *src)
541 {
542    dst->u[0] = src->u[0];
543    dst->u[1] = src->u[1];
544    dst->u[2] = src->u[2];
545    dst->u[3] = src->u[3];
546 }
547 
548 static void
micro_rcp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)549 micro_rcp(union tgsi_exec_channel *dst,
550           const union tgsi_exec_channel *src)
551 {
552 #if 0 /* for debugging */
553    assert(src->f[0] != 0.0f);
554    assert(src->f[1] != 0.0f);
555    assert(src->f[2] != 0.0f);
556    assert(src->f[3] != 0.0f);
557 #endif
558    dst->f[0] = 1.0f / src->f[0];
559    dst->f[1] = 1.0f / src->f[1];
560    dst->f[2] = 1.0f / src->f[2];
561    dst->f[3] = 1.0f / src->f[3];
562 }
563 
564 static void
micro_rnd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)565 micro_rnd(union tgsi_exec_channel *dst,
566           const union tgsi_exec_channel *src)
567 {
568    dst->f[0] = _mesa_roundevenf(src->f[0]);
569    dst->f[1] = _mesa_roundevenf(src->f[1]);
570    dst->f[2] = _mesa_roundevenf(src->f[2]);
571    dst->f[3] = _mesa_roundevenf(src->f[3]);
572 }
573 
574 static void
micro_rsq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)575 micro_rsq(union tgsi_exec_channel *dst,
576           const union tgsi_exec_channel *src)
577 {
578 #if 0 /* for debugging */
579    assert(src->f[0] != 0.0f);
580    assert(src->f[1] != 0.0f);
581    assert(src->f[2] != 0.0f);
582    assert(src->f[3] != 0.0f);
583 #endif
584    dst->f[0] = 1.0f / sqrtf(src->f[0]);
585    dst->f[1] = 1.0f / sqrtf(src->f[1]);
586    dst->f[2] = 1.0f / sqrtf(src->f[2]);
587    dst->f[3] = 1.0f / sqrtf(src->f[3]);
588 }
589 
590 static void
micro_sqrt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)591 micro_sqrt(union tgsi_exec_channel *dst,
592            const union tgsi_exec_channel *src)
593 {
594    dst->f[0] = sqrtf(src->f[0]);
595    dst->f[1] = sqrtf(src->f[1]);
596    dst->f[2] = sqrtf(src->f[2]);
597    dst->f[3] = sqrtf(src->f[3]);
598 }
599 
600 static void
micro_seq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)601 micro_seq(union tgsi_exec_channel *dst,
602           const union tgsi_exec_channel *src0,
603           const union tgsi_exec_channel *src1)
604 {
605    dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
606    dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
607    dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
608    dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
609 }
610 
611 static void
micro_sge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)612 micro_sge(union tgsi_exec_channel *dst,
613           const union tgsi_exec_channel *src0,
614           const union tgsi_exec_channel *src1)
615 {
616    dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
617    dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
618    dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
619    dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
620 }
621 
622 static void
micro_sgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)623 micro_sgn(union tgsi_exec_channel *dst,
624           const union tgsi_exec_channel *src)
625 {
626    dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
627    dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
628    dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
629    dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
630 }
631 
632 static void
micro_isgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)633 micro_isgn(union tgsi_exec_channel *dst,
634           const union tgsi_exec_channel *src)
635 {
636    dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
637    dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
638    dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
639    dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
640 }
641 
642 static void
micro_sgt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)643 micro_sgt(union tgsi_exec_channel *dst,
644           const union tgsi_exec_channel *src0,
645           const union tgsi_exec_channel *src1)
646 {
647    dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
648    dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
649    dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
650    dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
651 }
652 
653 static void
micro_sin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)654 micro_sin(union tgsi_exec_channel *dst,
655           const union tgsi_exec_channel *src)
656 {
657    dst->f[0] = sinf(src->f[0]);
658    dst->f[1] = sinf(src->f[1]);
659    dst->f[2] = sinf(src->f[2]);
660    dst->f[3] = sinf(src->f[3]);
661 }
662 
663 static void
micro_sle(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)664 micro_sle(union tgsi_exec_channel *dst,
665           const union tgsi_exec_channel *src0,
666           const union tgsi_exec_channel *src1)
667 {
668    dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
669    dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
670    dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
671    dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
672 }
673 
674 static void
micro_slt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)675 micro_slt(union tgsi_exec_channel *dst,
676           const union tgsi_exec_channel *src0,
677           const union tgsi_exec_channel *src1)
678 {
679    dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
680    dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
681    dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
682    dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
683 }
684 
685 static void
micro_sne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)686 micro_sne(union tgsi_exec_channel *dst,
687           const union tgsi_exec_channel *src0,
688           const union tgsi_exec_channel *src1)
689 {
690    dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
691    dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
692    dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
693    dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
694 }
695 
696 static void
micro_trunc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)697 micro_trunc(union tgsi_exec_channel *dst,
698             const union tgsi_exec_channel *src)
699 {
700    dst->f[0] = truncf(src->f[0]);
701    dst->f[1] = truncf(src->f[1]);
702    dst->f[2] = truncf(src->f[2]);
703    dst->f[3] = truncf(src->f[3]);
704 }
705 
706 static void
micro_u2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)707 micro_u2d(union tgsi_double_channel *dst,
708           const union tgsi_exec_channel *src)
709 {
710    dst->d[0] = (double)src->u[0];
711    dst->d[1] = (double)src->u[1];
712    dst->d[2] = (double)src->u[2];
713    dst->d[3] = (double)src->u[3];
714 }
715 
716 static void
micro_i64abs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)717 micro_i64abs(union tgsi_double_channel *dst,
718              const union tgsi_double_channel *src)
719 {
720    dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
721    dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
722    dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
723    dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
724 }
725 
726 static void
micro_i64sgn(union tgsi_double_channel * dst,const union tgsi_double_channel * src)727 micro_i64sgn(union tgsi_double_channel *dst,
728              const union tgsi_double_channel *src)
729 {
730    dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
731    dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
732    dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
733    dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
734 }
735 
736 static void
micro_i64neg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)737 micro_i64neg(union tgsi_double_channel *dst,
738              const union tgsi_double_channel *src)
739 {
740    dst->i64[0] = -src->i64[0];
741    dst->i64[1] = -src->i64[1];
742    dst->i64[2] = -src->i64[2];
743    dst->i64[3] = -src->i64[3];
744 }
745 
746 static void
micro_u64seq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)747 micro_u64seq(union tgsi_double_channel *dst,
748            const union tgsi_double_channel *src)
749 {
750    dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
751    dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
752    dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
753    dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
754 }
755 
756 static void
micro_u64sne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)757 micro_u64sne(union tgsi_double_channel *dst,
758              const union tgsi_double_channel *src)
759 {
760    dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
761    dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
762    dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
763    dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
764 }
765 
766 static void
micro_i64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)767 micro_i64slt(union tgsi_double_channel *dst,
768              const union tgsi_double_channel *src)
769 {
770    dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
771    dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
772    dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
773    dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
774 }
775 
776 static void
micro_u64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)777 micro_u64slt(union tgsi_double_channel *dst,
778              const union tgsi_double_channel *src)
779 {
780    dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
781    dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
782    dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
783    dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
784 }
785 
786 static void
micro_i64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)787 micro_i64sge(union tgsi_double_channel *dst,
788            const union tgsi_double_channel *src)
789 {
790    dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
791    dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
792    dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
793    dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
794 }
795 
796 static void
micro_u64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)797 micro_u64sge(union tgsi_double_channel *dst,
798              const union tgsi_double_channel *src)
799 {
800    dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
801    dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
802    dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
803    dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
804 }
805 
806 static void
micro_u64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)807 micro_u64max(union tgsi_double_channel *dst,
808              const union tgsi_double_channel *src)
809 {
810    dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
811    dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
812    dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
813    dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
814 }
815 
816 static void
micro_i64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)817 micro_i64max(union tgsi_double_channel *dst,
818              const union tgsi_double_channel *src)
819 {
820    dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
821    dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
822    dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
823    dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
824 }
825 
826 static void
micro_u64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)827 micro_u64min(union tgsi_double_channel *dst,
828              const union tgsi_double_channel *src)
829 {
830    dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
831    dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
832    dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
833    dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
834 }
835 
836 static void
micro_i64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)837 micro_i64min(union tgsi_double_channel *dst,
838              const union tgsi_double_channel *src)
839 {
840    dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
841    dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
842    dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
843    dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
844 }
845 
846 static void
micro_u64add(union tgsi_double_channel * dst,const union tgsi_double_channel * src)847 micro_u64add(union tgsi_double_channel *dst,
848              const union tgsi_double_channel *src)
849 {
850    dst->u64[0] = src[0].u64[0] + src[1].u64[0];
851    dst->u64[1] = src[0].u64[1] + src[1].u64[1];
852    dst->u64[2] = src[0].u64[2] + src[1].u64[2];
853    dst->u64[3] = src[0].u64[3] + src[1].u64[3];
854 }
855 
856 static void
micro_u64mul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)857 micro_u64mul(union tgsi_double_channel *dst,
858              const union tgsi_double_channel *src)
859 {
860    dst->u64[0] = src[0].u64[0] * src[1].u64[0];
861    dst->u64[1] = src[0].u64[1] * src[1].u64[1];
862    dst->u64[2] = src[0].u64[2] * src[1].u64[2];
863    dst->u64[3] = src[0].u64[3] * src[1].u64[3];
864 }
865 
866 static void
micro_u64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)867 micro_u64div(union tgsi_double_channel *dst,
868              const union tgsi_double_channel *src)
869 {
870    dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull;
871    dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull;
872    dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull;
873    dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull;
874 }
875 
876 static void
micro_i64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)877 micro_i64div(union tgsi_double_channel *dst,
878              const union tgsi_double_channel *src)
879 {
880    dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0;
881    dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0;
882    dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0;
883    dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0;
884 }
885 
886 static void
micro_u64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)887 micro_u64mod(union tgsi_double_channel *dst,
888              const union tgsi_double_channel *src)
889 {
890    dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull;
891    dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull;
892    dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull;
893    dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull;
894 }
895 
896 static void
micro_i64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)897 micro_i64mod(union tgsi_double_channel *dst,
898              const union tgsi_double_channel *src)
899 {
900    dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll;
901    dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll;
902    dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll;
903    dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll;
904 }
905 
906 static void
micro_u64shl(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)907 micro_u64shl(union tgsi_double_channel *dst,
908              const union tgsi_double_channel *src0,
909              union tgsi_exec_channel *src1)
910 {
911    unsigned masked_count;
912    masked_count = src1->u[0] & 0x3f;
913    dst->u64[0] = src0->u64[0] << masked_count;
914    masked_count = src1->u[1] & 0x3f;
915    dst->u64[1] = src0->u64[1] << masked_count;
916    masked_count = src1->u[2] & 0x3f;
917    dst->u64[2] = src0->u64[2] << masked_count;
918    masked_count = src1->u[3] & 0x3f;
919    dst->u64[3] = src0->u64[3] << masked_count;
920 }
921 
922 static void
micro_i64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)923 micro_i64shr(union tgsi_double_channel *dst,
924              const union tgsi_double_channel *src0,
925              union tgsi_exec_channel *src1)
926 {
927    unsigned masked_count;
928    masked_count = src1->u[0] & 0x3f;
929    dst->i64[0] = src0->i64[0] >> masked_count;
930    masked_count = src1->u[1] & 0x3f;
931    dst->i64[1] = src0->i64[1] >> masked_count;
932    masked_count = src1->u[2] & 0x3f;
933    dst->i64[2] = src0->i64[2] >> masked_count;
934    masked_count = src1->u[3] & 0x3f;
935    dst->i64[3] = src0->i64[3] >> masked_count;
936 }
937 
938 static void
micro_u64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)939 micro_u64shr(union tgsi_double_channel *dst,
940              const union tgsi_double_channel *src0,
941              union tgsi_exec_channel *src1)
942 {
943    unsigned masked_count;
944    masked_count = src1->u[0] & 0x3f;
945    dst->u64[0] = src0->u64[0] >> masked_count;
946    masked_count = src1->u[1] & 0x3f;
947    dst->u64[1] = src0->u64[1] >> masked_count;
948    masked_count = src1->u[2] & 0x3f;
949    dst->u64[2] = src0->u64[2] >> masked_count;
950    masked_count = src1->u[3] & 0x3f;
951    dst->u64[3] = src0->u64[3] >> masked_count;
952 }
953 
954 enum tgsi_exec_datatype {
955    TGSI_EXEC_DATA_FLOAT,
956    TGSI_EXEC_DATA_INT,
957    TGSI_EXEC_DATA_UINT,
958    TGSI_EXEC_DATA_DOUBLE,
959    TGSI_EXEC_DATA_INT64,
960    TGSI_EXEC_DATA_UINT64,
961 };
962 
963 /*
964  * Shorthand locations of various utility registers (_I = Index, _C = Channel)
965  */
966 #define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
967 #define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
968 #define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
969 #define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
970 #define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
971 #define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
972 #define TEMP_PRIMITIVE_S1_I   TGSI_EXEC_TEMP_PRIMITIVE_S1_I
973 #define TEMP_PRIMITIVE_S1_C   TGSI_EXEC_TEMP_PRIMITIVE_S1_C
974 #define TEMP_PRIMITIVE_S2_I   TGSI_EXEC_TEMP_PRIMITIVE_S2_I
975 #define TEMP_PRIMITIVE_S2_C   TGSI_EXEC_TEMP_PRIMITIVE_S2_C
976 #define TEMP_PRIMITIVE_S3_I   TGSI_EXEC_TEMP_PRIMITIVE_S3_I
977 #define TEMP_PRIMITIVE_S3_C   TGSI_EXEC_TEMP_PRIMITIVE_S3_C
978 
979 static const struct {
980    int idx;
981    int chan;
982 } temp_prim_idxs[] = {
983    { TEMP_PRIMITIVE_I, TEMP_PRIMITIVE_C },
984    { TEMP_PRIMITIVE_S1_I, TEMP_PRIMITIVE_S1_C },
985    { TEMP_PRIMITIVE_S2_I, TEMP_PRIMITIVE_S2_C },
986    { TEMP_PRIMITIVE_S3_I, TEMP_PRIMITIVE_S3_C },
987 };
988 
989 /** The execution mask depends on the conditional mask and the loop mask */
990 #define UPDATE_EXEC_MASK(MACH) \
991       MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
992 
993 
994 static const union tgsi_exec_channel ZeroVec =
995    { { 0.0, 0.0, 0.0, 0.0 } };
996 
997 static const union tgsi_exec_channel OneVec = {
998    {1.0f, 1.0f, 1.0f, 1.0f}
999 };
1000 
1001 static const union tgsi_exec_channel P128Vec = {
1002    {128.0f, 128.0f, 128.0f, 128.0f}
1003 };
1004 
1005 static const union tgsi_exec_channel M128Vec = {
1006    {-128.0f, -128.0f, -128.0f, -128.0f}
1007 };
1008 
1009 
1010 /**
1011  * Assert that none of the float values in 'chan' are infinite or NaN.
1012  * NaN and Inf may occur normally during program execution and should
1013  * not lead to crashes, etc.  But when debugging, it's helpful to catch
1014  * them.
1015  */
1016 static inline void
check_inf_or_nan(const union tgsi_exec_channel * chan)1017 check_inf_or_nan(const union tgsi_exec_channel *chan)
1018 {
1019    assert(!util_is_inf_or_nan((chan)->f[0]));
1020    assert(!util_is_inf_or_nan((chan)->f[1]));
1021    assert(!util_is_inf_or_nan((chan)->f[2]));
1022    assert(!util_is_inf_or_nan((chan)->f[3]));
1023 }
1024 
1025 
1026 #ifdef DEBUG
1027 static void
print_chan(const char * msg,const union tgsi_exec_channel * chan)1028 print_chan(const char *msg, const union tgsi_exec_channel *chan)
1029 {
1030    debug_printf("%s = {%f, %f, %f, %f}\n",
1031                 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
1032 }
1033 #endif
1034 
1035 
1036 #ifdef DEBUG
1037 static void
print_temp(const struct tgsi_exec_machine * mach,uint index)1038 print_temp(const struct tgsi_exec_machine *mach, uint index)
1039 {
1040    const struct tgsi_exec_vector *tmp = &mach->Temps[index];
1041    int i;
1042    debug_printf("Temp[%u] =\n", index);
1043    for (i = 0; i < 4; i++) {
1044       debug_printf("  %c: { %f, %f, %f, %f }\n",
1045                    "XYZW"[i],
1046                    tmp->xyzw[i].f[0],
1047                    tmp->xyzw[i].f[1],
1048                    tmp->xyzw[i].f[2],
1049                    tmp->xyzw[i].f[3]);
1050    }
1051 }
1052 #endif
1053 
1054 
1055 void
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine * mach,unsigned num_bufs,const void ** bufs,const unsigned * buf_sizes)1056 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
1057                                unsigned num_bufs,
1058                                const void **bufs,
1059                                const unsigned *buf_sizes)
1060 {
1061    unsigned i;
1062 
1063    for (i = 0; i < num_bufs; i++) {
1064       mach->Consts[i] = bufs[i];
1065       mach->ConstsSize[i] = buf_sizes[i];
1066    }
1067 }
1068 
1069 /**
1070  * Initialize machine state by expanding tokens to full instructions,
1071  * allocating temporary storage, setting up constants, etc.
1072  * After this, we can call tgsi_exec_machine_run() many times.
1073  */
1074 void
tgsi_exec_machine_bind_shader(struct tgsi_exec_machine * mach,const struct tgsi_token * tokens,struct tgsi_sampler * sampler,struct tgsi_image * image,struct tgsi_buffer * buffer)1075 tgsi_exec_machine_bind_shader(
1076    struct tgsi_exec_machine *mach,
1077    const struct tgsi_token *tokens,
1078    struct tgsi_sampler *sampler,
1079    struct tgsi_image *image,
1080    struct tgsi_buffer *buffer)
1081 {
1082    uint k;
1083    struct tgsi_parse_context parse;
1084    struct tgsi_full_instruction *instructions;
1085    struct tgsi_full_declaration *declarations;
1086    uint maxInstructions = 10, numInstructions = 0;
1087    uint maxDeclarations = 10, numDeclarations = 0;
1088 
1089 #if 0
1090    tgsi_dump(tokens, 0);
1091 #endif
1092 
1093    util_init_math();
1094 
1095 
1096    mach->Tokens = tokens;
1097    mach->Sampler = sampler;
1098    mach->Image = image;
1099    mach->Buffer = buffer;
1100 
1101    if (!tokens) {
1102       /* unbind and free all */
1103       FREE(mach->Declarations);
1104       mach->Declarations = NULL;
1105       mach->NumDeclarations = 0;
1106 
1107       FREE(mach->Instructions);
1108       mach->Instructions = NULL;
1109       mach->NumInstructions = 0;
1110 
1111       return;
1112    }
1113 
1114    k = tgsi_parse_init (&parse, mach->Tokens);
1115    if (k != TGSI_PARSE_OK) {
1116       debug_printf( "Problem parsing!\n" );
1117       return;
1118    }
1119 
1120    mach->ImmLimit = 0;
1121    mach->NumOutputs = 0;
1122 
1123    for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
1124       mach->SysSemanticToIndex[k] = -1;
1125 
1126    if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
1127        !mach->UsedGeometryShader) {
1128       struct tgsi_exec_vector *inputs;
1129       struct tgsi_exec_vector *outputs;
1130 
1131       inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1132                             TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,
1133                             16);
1134 
1135       if (!inputs)
1136          return;
1137 
1138       outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1139                              TGSI_MAX_TOTAL_VERTICES, 16);
1140 
1141       if (!outputs) {
1142          align_free(inputs);
1143          return;
1144       }
1145 
1146       align_free(mach->Inputs);
1147       align_free(mach->Outputs);
1148 
1149       mach->Inputs = inputs;
1150       mach->Outputs = outputs;
1151       mach->UsedGeometryShader = TRUE;
1152    }
1153 
1154    declarations = (struct tgsi_full_declaration *)
1155       MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
1156 
1157    if (!declarations) {
1158       return;
1159    }
1160 
1161    instructions = (struct tgsi_full_instruction *)
1162       MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
1163 
1164    if (!instructions) {
1165       FREE( declarations );
1166       return;
1167    }
1168 
1169    while( !tgsi_parse_end_of_tokens( &parse ) ) {
1170       uint i;
1171 
1172       tgsi_parse_token( &parse );
1173       switch( parse.FullToken.Token.Type ) {
1174       case TGSI_TOKEN_TYPE_DECLARATION:
1175          /* save expanded declaration */
1176          if (numDeclarations == maxDeclarations) {
1177             declarations = REALLOC(declarations,
1178                                    maxDeclarations
1179                                    * sizeof(struct tgsi_full_declaration),
1180                                    (maxDeclarations + 10)
1181                                    * sizeof(struct tgsi_full_declaration));
1182             maxDeclarations += 10;
1183          }
1184          if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT)
1185             mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1);
1186          else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1187             const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1188             mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
1189          }
1190 
1191          memcpy(declarations + numDeclarations,
1192                 &parse.FullToken.FullDeclaration,
1193                 sizeof(declarations[0]));
1194          numDeclarations++;
1195          break;
1196 
1197       case TGSI_TOKEN_TYPE_IMMEDIATE:
1198          {
1199             uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1200             assert( size <= 4 );
1201             if (mach->ImmLimit >= mach->ImmsReserved) {
1202                unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128;
1203                float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4));
1204                if (imms) {
1205                   mach->ImmsReserved = newReserved;
1206                   mach->Imms = imms;
1207                } else {
1208                   debug_printf("Unable to (re)allocate space for immidiate constants\n");
1209                   break;
1210                }
1211             }
1212 
1213             for( i = 0; i < size; i++ ) {
1214                mach->Imms[mach->ImmLimit][i] =
1215 		  parse.FullToken.FullImmediate.u[i].Float;
1216             }
1217             mach->ImmLimit += 1;
1218          }
1219          break;
1220 
1221       case TGSI_TOKEN_TYPE_INSTRUCTION:
1222 
1223          /* save expanded instruction */
1224          if (numInstructions == maxInstructions) {
1225             instructions = REALLOC(instructions,
1226                                    maxInstructions
1227                                    * sizeof(struct tgsi_full_instruction),
1228                                    (maxInstructions + 10)
1229                                    * sizeof(struct tgsi_full_instruction));
1230             maxInstructions += 10;
1231          }
1232 
1233          memcpy(instructions + numInstructions,
1234                 &parse.FullToken.FullInstruction,
1235                 sizeof(instructions[0]));
1236 
1237          numInstructions++;
1238          break;
1239 
1240       case TGSI_TOKEN_TYPE_PROPERTY:
1241          if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
1242             if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
1243                mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
1244             }
1245          }
1246          break;
1247 
1248       default:
1249          assert( 0 );
1250       }
1251    }
1252    tgsi_parse_free (&parse);
1253 
1254    FREE(mach->Declarations);
1255    mach->Declarations = declarations;
1256    mach->NumDeclarations = numDeclarations;
1257 
1258    FREE(mach->Instructions);
1259    mach->Instructions = instructions;
1260    mach->NumInstructions = numInstructions;
1261 }
1262 
1263 
1264 struct tgsi_exec_machine *
tgsi_exec_machine_create(enum pipe_shader_type shader_type)1265 tgsi_exec_machine_create(enum pipe_shader_type shader_type)
1266 {
1267    struct tgsi_exec_machine *mach;
1268 
1269    mach = align_malloc( sizeof *mach, 16 );
1270    if (!mach)
1271       goto fail;
1272 
1273    memset(mach, 0, sizeof(*mach));
1274 
1275    mach->ShaderType = shader_type;
1276    mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
1277    mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
1278 
1279    if (shader_type != PIPE_SHADER_COMPUTE) {
1280       mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
1281       mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
1282       if (!mach->Inputs || !mach->Outputs)
1283          goto fail;
1284    }
1285 
1286    if (shader_type == PIPE_SHADER_FRAGMENT) {
1287       mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16);
1288       if (!mach->InputSampleOffsetApply)
1289          goto fail;
1290    }
1291 
1292 #ifdef DEBUG
1293    /* silence warnings */
1294    (void) print_chan;
1295    (void) print_temp;
1296 #endif
1297 
1298    return mach;
1299 
1300 fail:
1301    if (mach) {
1302       align_free(mach->InputSampleOffsetApply);
1303       align_free(mach->Inputs);
1304       align_free(mach->Outputs);
1305       align_free(mach);
1306    }
1307    return NULL;
1308 }
1309 
1310 
1311 void
tgsi_exec_machine_destroy(struct tgsi_exec_machine * mach)1312 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
1313 {
1314    if (mach) {
1315       FREE(mach->Instructions);
1316       FREE(mach->Declarations);
1317       FREE(mach->Imms);
1318 
1319       align_free(mach->InputSampleOffsetApply);
1320       align_free(mach->Inputs);
1321       align_free(mach->Outputs);
1322 
1323       align_free(mach);
1324    }
1325 }
1326 
1327 static void
micro_add(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1328 micro_add(union tgsi_exec_channel *dst,
1329           const union tgsi_exec_channel *src0,
1330           const union tgsi_exec_channel *src1)
1331 {
1332    dst->f[0] = src0->f[0] + src1->f[0];
1333    dst->f[1] = src0->f[1] + src1->f[1];
1334    dst->f[2] = src0->f[2] + src1->f[2];
1335    dst->f[3] = src0->f[3] + src1->f[3];
1336 }
1337 
1338 static void
micro_div(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1339 micro_div(
1340    union tgsi_exec_channel *dst,
1341    const union tgsi_exec_channel *src0,
1342    const union tgsi_exec_channel *src1 )
1343 {
1344    if (src1->f[0] != 0) {
1345       dst->f[0] = src0->f[0] / src1->f[0];
1346    }
1347    if (src1->f[1] != 0) {
1348       dst->f[1] = src0->f[1] / src1->f[1];
1349    }
1350    if (src1->f[2] != 0) {
1351       dst->f[2] = src0->f[2] / src1->f[2];
1352    }
1353    if (src1->f[3] != 0) {
1354       dst->f[3] = src0->f[3] / src1->f[3];
1355    }
1356 }
1357 
1358 static void
micro_lt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)1359 micro_lt(
1360    union tgsi_exec_channel *dst,
1361    const union tgsi_exec_channel *src0,
1362    const union tgsi_exec_channel *src1,
1363    const union tgsi_exec_channel *src2,
1364    const union tgsi_exec_channel *src3 )
1365 {
1366    dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
1367    dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
1368    dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
1369    dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
1370 }
1371 
1372 static void
micro_max(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1373 micro_max(union tgsi_exec_channel *dst,
1374           const union tgsi_exec_channel *src0,
1375           const union tgsi_exec_channel *src1)
1376 {
1377    dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
1378    dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
1379    dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
1380    dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
1381 }
1382 
1383 static void
micro_min(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1384 micro_min(union tgsi_exec_channel *dst,
1385           const union tgsi_exec_channel *src0,
1386           const union tgsi_exec_channel *src1)
1387 {
1388    dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
1389    dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
1390    dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
1391    dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
1392 }
1393 
1394 static void
micro_mul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1395 micro_mul(union tgsi_exec_channel *dst,
1396           const union tgsi_exec_channel *src0,
1397           const union tgsi_exec_channel *src1)
1398 {
1399    dst->f[0] = src0->f[0] * src1->f[0];
1400    dst->f[1] = src0->f[1] * src1->f[1];
1401    dst->f[2] = src0->f[2] * src1->f[2];
1402    dst->f[3] = src0->f[3] * src1->f[3];
1403 }
1404 
1405 static void
micro_neg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)1406 micro_neg(
1407    union tgsi_exec_channel *dst,
1408    const union tgsi_exec_channel *src )
1409 {
1410    dst->f[0] = -src->f[0];
1411    dst->f[1] = -src->f[1];
1412    dst->f[2] = -src->f[2];
1413    dst->f[3] = -src->f[3];
1414 }
1415 
1416 static void
micro_pow(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1417 micro_pow(
1418    union tgsi_exec_channel *dst,
1419    const union tgsi_exec_channel *src0,
1420    const union tgsi_exec_channel *src1 )
1421 {
1422 #if FAST_MATH
1423    dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1424    dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1425    dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1426    dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1427 #else
1428    dst->f[0] = powf( src0->f[0], src1->f[0] );
1429    dst->f[1] = powf( src0->f[1], src1->f[1] );
1430    dst->f[2] = powf( src0->f[2], src1->f[2] );
1431    dst->f[3] = powf( src0->f[3], src1->f[3] );
1432 #endif
1433 }
1434 
1435 static void
micro_ldexp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1436 micro_ldexp(union tgsi_exec_channel *dst,
1437             const union tgsi_exec_channel *src0,
1438             const union tgsi_exec_channel *src1)
1439 {
1440    dst->f[0] = ldexpf(src0->f[0], src1->i[0]);
1441    dst->f[1] = ldexpf(src0->f[1], src1->i[1]);
1442    dst->f[2] = ldexpf(src0->f[2], src1->i[2]);
1443    dst->f[3] = ldexpf(src0->f[3], src1->i[3]);
1444 }
1445 
1446 static void
micro_sub(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1447 micro_sub(union tgsi_exec_channel *dst,
1448           const union tgsi_exec_channel *src0,
1449           const union tgsi_exec_channel *src1)
1450 {
1451    dst->f[0] = src0->f[0] - src1->f[0];
1452    dst->f[1] = src0->f[1] - src1->f[1];
1453    dst->f[2] = src0->f[2] - src1->f[2];
1454    dst->f[3] = src0->f[3] - src1->f[3];
1455 }
1456 
1457 static void
fetch_src_file_channel(const struct tgsi_exec_machine * mach,const uint file,const uint swizzle,const union tgsi_exec_channel * index,const union tgsi_exec_channel * index2D,union tgsi_exec_channel * chan)1458 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1459                        const uint file,
1460                        const uint swizzle,
1461                        const union tgsi_exec_channel *index,
1462                        const union tgsi_exec_channel *index2D,
1463                        union tgsi_exec_channel *chan)
1464 {
1465    uint i;
1466 
1467    assert(swizzle < 4);
1468 
1469    switch (file) {
1470    case TGSI_FILE_CONSTANT:
1471       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1472          assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1473          assert(mach->Consts[index2D->i[i]]);
1474 
1475          if (index->i[i] < 0) {
1476             chan->u[i] = 0;
1477          } else {
1478             /* NOTE: copying the const value as a uint instead of float */
1479             const uint constbuf = index2D->i[i];
1480             const uint *buf = (const uint *)mach->Consts[constbuf];
1481             const int pos = index->i[i] * 4 + swizzle;
1482             /* const buffer bounds check */
1483             if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) {
1484                if (0) {
1485                   /* Debug: print warning */
1486                   static int count = 0;
1487                   if (count++ < 100)
1488                      debug_printf("TGSI Exec: const buffer index %d"
1489                                   " out of bounds\n", pos);
1490                }
1491                chan->u[i] = 0;
1492             }
1493             else
1494                chan->u[i] = buf[pos];
1495          }
1496       }
1497       break;
1498 
1499    case TGSI_FILE_INPUT:
1500       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1501          /*
1502          if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1503             debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1504                          index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1505                          index2D->i[i], index->i[i]);
1506                          }*/
1507          int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1508          assert(pos >= 0);
1509          assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1510          chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1511       }
1512       break;
1513 
1514    case TGSI_FILE_SYSTEM_VALUE:
1515       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1516          chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
1517       }
1518       break;
1519 
1520    case TGSI_FILE_TEMPORARY:
1521       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1522          assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1523          assert(index2D->i[i] == 0);
1524 
1525          chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1526       }
1527       break;
1528 
1529    case TGSI_FILE_IMMEDIATE:
1530       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1531          assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1532          assert(index2D->i[i] == 0);
1533 
1534          chan->f[i] = mach->Imms[index->i[i]][swizzle];
1535       }
1536       break;
1537 
1538    case TGSI_FILE_ADDRESS:
1539       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1540          assert(index->i[i] >= 0);
1541          assert(index2D->i[i] == 0);
1542 
1543          chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1544       }
1545       break;
1546 
1547    case TGSI_FILE_OUTPUT:
1548       /* vertex/fragment output vars can be read too */
1549       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1550          assert(index->i[i] >= 0);
1551          assert(index2D->i[i] == 0);
1552 
1553          chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1554       }
1555       break;
1556 
1557    default:
1558       assert(0);
1559       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1560          chan->u[i] = 0;
1561       }
1562    }
1563 }
1564 
1565 static void
get_index_registers(const struct tgsi_exec_machine * mach,const struct tgsi_full_src_register * reg,union tgsi_exec_channel * index,union tgsi_exec_channel * index2D)1566 get_index_registers(const struct tgsi_exec_machine *mach,
1567                     const struct tgsi_full_src_register *reg,
1568                     union tgsi_exec_channel *index,
1569                     union tgsi_exec_channel *index2D)
1570 {
1571    uint swizzle;
1572 
1573    /* We start with a direct index into a register file.
1574     *
1575     *    file[1],
1576     *    where:
1577     *       file = Register.File
1578     *       [1] = Register.Index
1579     */
1580    index->i[0] =
1581    index->i[1] =
1582    index->i[2] =
1583    index->i[3] = reg->Register.Index;
1584 
1585    /* There is an extra source register that indirectly subscripts
1586     * a register file. The direct index now becomes an offset
1587     * that is being added to the indirect register.
1588     *
1589     *    file[ind[2].x+1],
1590     *    where:
1591     *       ind = Indirect.File
1592     *       [2] = Indirect.Index
1593     *       .x = Indirect.SwizzleX
1594     */
1595    if (reg->Register.Indirect) {
1596       union tgsi_exec_channel index2;
1597       union tgsi_exec_channel indir_index;
1598       const uint execmask = mach->ExecMask;
1599       uint i;
1600 
1601       /* which address register (always zero now) */
1602       index2.i[0] =
1603       index2.i[1] =
1604       index2.i[2] =
1605       index2.i[3] = reg->Indirect.Index;
1606       /* get current value of address register[swizzle] */
1607       swizzle = reg->Indirect.Swizzle;
1608       fetch_src_file_channel(mach,
1609                              reg->Indirect.File,
1610                              swizzle,
1611                              &index2,
1612                              &ZeroVec,
1613                              &indir_index);
1614 
1615       /* add value of address register to the offset */
1616       index->i[0] += indir_index.i[0];
1617       index->i[1] += indir_index.i[1];
1618       index->i[2] += indir_index.i[2];
1619       index->i[3] += indir_index.i[3];
1620 
1621       /* for disabled execution channels, zero-out the index to
1622        * avoid using a potential garbage value.
1623        */
1624       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1625          if ((execmask & (1 << i)) == 0)
1626             index->i[i] = 0;
1627       }
1628    }
1629 
1630    /* There is an extra source register that is a second
1631     * subscript to a register file. Effectively it means that
1632     * the register file is actually a 2D array of registers.
1633     *
1634     *    file[3][1],
1635     *    where:
1636     *       [3] = Dimension.Index
1637     */
1638    if (reg->Register.Dimension) {
1639       index2D->i[0] =
1640       index2D->i[1] =
1641       index2D->i[2] =
1642       index2D->i[3] = reg->Dimension.Index;
1643 
1644       /* Again, the second subscript index can be addressed indirectly
1645        * identically to the first one.
1646        * Nothing stops us from indirectly addressing the indirect register,
1647        * but there is no need for that, so we won't exercise it.
1648        *
1649        *    file[ind[4].y+3][1],
1650        *    where:
1651        *       ind = DimIndirect.File
1652        *       [4] = DimIndirect.Index
1653        *       .y = DimIndirect.SwizzleX
1654        */
1655       if (reg->Dimension.Indirect) {
1656          union tgsi_exec_channel index2;
1657          union tgsi_exec_channel indir_index;
1658          const uint execmask = mach->ExecMask;
1659          uint i;
1660 
1661          index2.i[0] =
1662          index2.i[1] =
1663          index2.i[2] =
1664          index2.i[3] = reg->DimIndirect.Index;
1665 
1666          swizzle = reg->DimIndirect.Swizzle;
1667          fetch_src_file_channel(mach,
1668                                 reg->DimIndirect.File,
1669                                 swizzle,
1670                                 &index2,
1671                                 &ZeroVec,
1672                                 &indir_index);
1673 
1674          index2D->i[0] += indir_index.i[0];
1675          index2D->i[1] += indir_index.i[1];
1676          index2D->i[2] += indir_index.i[2];
1677          index2D->i[3] += indir_index.i[3];
1678 
1679          /* for disabled execution channels, zero-out the index to
1680           * avoid using a potential garbage value.
1681           */
1682          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1683             if ((execmask & (1 << i)) == 0) {
1684                index2D->i[i] = 0;
1685             }
1686          }
1687       }
1688 
1689       /* If by any chance there was a need for a 3D array of register
1690        * files, we would have to check whether Dimension is followed
1691        * by a dimension register and continue the saga.
1692        */
1693    } else {
1694       index2D->i[0] =
1695       index2D->i[1] =
1696       index2D->i[2] =
1697       index2D->i[3] = 0;
1698    }
1699 }
1700 
1701 
1702 static void
fetch_source_d(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const uint chan_index)1703 fetch_source_d(const struct tgsi_exec_machine *mach,
1704                union tgsi_exec_channel *chan,
1705                const struct tgsi_full_src_register *reg,
1706 	       const uint chan_index)
1707 {
1708    union tgsi_exec_channel index;
1709    union tgsi_exec_channel index2D;
1710    uint swizzle;
1711 
1712    get_index_registers(mach, reg, &index, &index2D);
1713 
1714 
1715    swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1716    fetch_src_file_channel(mach,
1717                           reg->Register.File,
1718                           swizzle,
1719                           &index,
1720                           &index2D,
1721                           chan);
1722 }
1723 
1724 static void
fetch_source(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const uint chan_index,enum tgsi_exec_datatype src_datatype)1725 fetch_source(const struct tgsi_exec_machine *mach,
1726              union tgsi_exec_channel *chan,
1727              const struct tgsi_full_src_register *reg,
1728              const uint chan_index,
1729              enum tgsi_exec_datatype src_datatype)
1730 {
1731    fetch_source_d(mach, chan, reg, chan_index);
1732 
1733    if (reg->Register.Absolute) {
1734       if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1735          micro_abs(chan, chan);
1736       } else {
1737          micro_iabs(chan, chan);
1738       }
1739    }
1740 
1741    if (reg->Register.Negate) {
1742       if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1743          micro_neg(chan, chan);
1744       } else {
1745          micro_ineg(chan, chan);
1746       }
1747    }
1748 }
1749 
1750 static union tgsi_exec_channel *
store_dest_dstret(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,uint chan_index,enum tgsi_exec_datatype dst_datatype)1751 store_dest_dstret(struct tgsi_exec_machine *mach,
1752                  const union tgsi_exec_channel *chan,
1753                  const struct tgsi_full_dst_register *reg,
1754                  uint chan_index,
1755                  enum tgsi_exec_datatype dst_datatype)
1756 {
1757    static union tgsi_exec_channel null;
1758    union tgsi_exec_channel *dst;
1759    union tgsi_exec_channel index2D;
1760    int offset = 0;  /* indirection offset */
1761    int index;
1762 
1763    /* for debugging */
1764    if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1765       check_inf_or_nan(chan);
1766    }
1767 
1768    /* There is an extra source register that indirectly subscripts
1769     * a register file. The direct index now becomes an offset
1770     * that is being added to the indirect register.
1771     *
1772     *    file[ind[2].x+1],
1773     *    where:
1774     *       ind = Indirect.File
1775     *       [2] = Indirect.Index
1776     *       .x = Indirect.SwizzleX
1777     */
1778    if (reg->Register.Indirect) {
1779       union tgsi_exec_channel index;
1780       union tgsi_exec_channel indir_index;
1781       uint swizzle;
1782 
1783       /* which address register (always zero for now) */
1784       index.i[0] =
1785       index.i[1] =
1786       index.i[2] =
1787       index.i[3] = reg->Indirect.Index;
1788 
1789       /* get current value of address register[swizzle] */
1790       swizzle = reg->Indirect.Swizzle;
1791 
1792       /* fetch values from the address/indirection register */
1793       fetch_src_file_channel(mach,
1794                              reg->Indirect.File,
1795                              swizzle,
1796                              &index,
1797                              &ZeroVec,
1798                              &indir_index);
1799 
1800       /* save indirection offset */
1801       offset = indir_index.i[0];
1802    }
1803 
1804    /* There is an extra source register that is a second
1805     * subscript to a register file. Effectively it means that
1806     * the register file is actually a 2D array of registers.
1807     *
1808     *    file[3][1],
1809     *    where:
1810     *       [3] = Dimension.Index
1811     */
1812    if (reg->Register.Dimension) {
1813       index2D.i[0] =
1814       index2D.i[1] =
1815       index2D.i[2] =
1816       index2D.i[3] = reg->Dimension.Index;
1817 
1818       /* Again, the second subscript index can be addressed indirectly
1819        * identically to the first one.
1820        * Nothing stops us from indirectly addressing the indirect register,
1821        * but there is no need for that, so we won't exercise it.
1822        *
1823        *    file[ind[4].y+3][1],
1824        *    where:
1825        *       ind = DimIndirect.File
1826        *       [4] = DimIndirect.Index
1827        *       .y = DimIndirect.SwizzleX
1828        */
1829       if (reg->Dimension.Indirect) {
1830          union tgsi_exec_channel index2;
1831          union tgsi_exec_channel indir_index;
1832          const uint execmask = mach->ExecMask;
1833          unsigned swizzle;
1834          uint i;
1835 
1836          index2.i[0] =
1837          index2.i[1] =
1838          index2.i[2] =
1839          index2.i[3] = reg->DimIndirect.Index;
1840 
1841          swizzle = reg->DimIndirect.Swizzle;
1842          fetch_src_file_channel(mach,
1843                                 reg->DimIndirect.File,
1844                                 swizzle,
1845                                 &index2,
1846                                 &ZeroVec,
1847                                 &indir_index);
1848 
1849          index2D.i[0] += indir_index.i[0];
1850          index2D.i[1] += indir_index.i[1];
1851          index2D.i[2] += indir_index.i[2];
1852          index2D.i[3] += indir_index.i[3];
1853 
1854          /* for disabled execution channels, zero-out the index to
1855           * avoid using a potential garbage value.
1856           */
1857          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1858             if ((execmask & (1 << i)) == 0) {
1859                index2D.i[i] = 0;
1860             }
1861          }
1862       }
1863 
1864       /* If by any chance there was a need for a 3D array of register
1865        * files, we would have to check whether Dimension is followed
1866        * by a dimension register and continue the saga.
1867        */
1868    } else {
1869       index2D.i[0] =
1870       index2D.i[1] =
1871       index2D.i[2] =
1872       index2D.i[3] = 0;
1873    }
1874 
1875    switch (reg->Register.File) {
1876    case TGSI_FILE_NULL:
1877       dst = &null;
1878       break;
1879 
1880    case TGSI_FILE_OUTPUT:
1881       index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1882          + reg->Register.Index;
1883       dst = &mach->Outputs[offset + index].xyzw[chan_index];
1884 #if 0
1885       debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1886                    mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1887                    reg->Register.Index);
1888       if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1889          debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1890          for (i = 0; i < TGSI_QUAD_SIZE; i++)
1891             if (execmask & (1 << i))
1892                debug_printf("%f, ", chan->f[i]);
1893          debug_printf(")\n");
1894       }
1895 #endif
1896       break;
1897 
1898    case TGSI_FILE_TEMPORARY:
1899       index = reg->Register.Index;
1900       assert( index < TGSI_EXEC_NUM_TEMPS );
1901       dst = &mach->Temps[offset + index].xyzw[chan_index];
1902       break;
1903 
1904    case TGSI_FILE_ADDRESS:
1905       index = reg->Register.Index;
1906       dst = &mach->Addrs[index].xyzw[chan_index];
1907       break;
1908 
1909    default:
1910       assert( 0 );
1911       return NULL;
1912    }
1913 
1914    return dst;
1915 }
1916 
1917 static void
store_dest_double(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,uint chan_index,enum tgsi_exec_datatype dst_datatype)1918 store_dest_double(struct tgsi_exec_machine *mach,
1919                  const union tgsi_exec_channel *chan,
1920                  const struct tgsi_full_dst_register *reg,
1921                  uint chan_index,
1922                  enum tgsi_exec_datatype dst_datatype)
1923 {
1924    union tgsi_exec_channel *dst;
1925    const uint execmask = mach->ExecMask;
1926    int i;
1927 
1928    dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype);
1929    if (!dst)
1930       return;
1931 
1932    /* doubles path */
1933    for (i = 0; i < TGSI_QUAD_SIZE; i++)
1934       if (execmask & (1 << i))
1935          dst->i[i] = chan->i[i];
1936 }
1937 
1938 static void
store_dest(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_index,enum tgsi_exec_datatype dst_datatype)1939 store_dest(struct tgsi_exec_machine *mach,
1940            const union tgsi_exec_channel *chan,
1941            const struct tgsi_full_dst_register *reg,
1942            const struct tgsi_full_instruction *inst,
1943            uint chan_index,
1944            enum tgsi_exec_datatype dst_datatype)
1945 {
1946    union tgsi_exec_channel *dst;
1947    const uint execmask = mach->ExecMask;
1948    int i;
1949 
1950    dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype);
1951    if (!dst)
1952       return;
1953 
1954    if (!inst->Instruction.Saturate) {
1955       for (i = 0; i < TGSI_QUAD_SIZE; i++)
1956          if (execmask & (1 << i))
1957             dst->i[i] = chan->i[i];
1958    }
1959    else {
1960       for (i = 0; i < TGSI_QUAD_SIZE; i++)
1961          if (execmask & (1 << i)) {
1962             if (chan->f[i] < 0.0f)
1963                dst->f[i] = 0.0f;
1964             else if (chan->f[i] > 1.0f)
1965                dst->f[i] = 1.0f;
1966             else
1967                dst->i[i] = chan->i[i];
1968          }
1969    }
1970 }
1971 
1972 #define FETCH(VAL,INDEX,CHAN)\
1973     fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1974 
1975 #define IFETCH(VAL,INDEX,CHAN)\
1976     fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1977 
1978 
1979 /**
1980  * Execute ARB-style KIL which is predicated by a src register.
1981  * Kill fragment if any of the four values is less than zero.
1982  */
1983 static void
exec_kill_if(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)1984 exec_kill_if(struct tgsi_exec_machine *mach,
1985              const struct tgsi_full_instruction *inst)
1986 {
1987    uint uniquemask;
1988    uint chan_index;
1989    uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1990    union tgsi_exec_channel r[1];
1991 
1992    /* This mask stores component bits that were already tested. */
1993    uniquemask = 0;
1994 
1995    for (chan_index = 0; chan_index < 4; chan_index++)
1996    {
1997       uint swizzle;
1998       uint i;
1999 
2000       /* unswizzle channel */
2001       swizzle = tgsi_util_get_full_src_register_swizzle (
2002                         &inst->Src[0],
2003                         chan_index);
2004 
2005       /* check if the component has not been already tested */
2006       if (uniquemask & (1 << swizzle))
2007          continue;
2008       uniquemask |= 1 << swizzle;
2009 
2010       FETCH(&r[0], 0, chan_index);
2011       for (i = 0; i < 4; i++)
2012          if (r[0].f[i] < 0.0f)
2013             kilmask |= 1 << i;
2014    }
2015 
2016    /* restrict to fragments currently executing */
2017    kilmask &= mach->ExecMask;
2018 
2019    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2020 }
2021 
2022 /**
2023  * Unconditional fragment kill/discard.
2024  */
2025 static void
exec_kill(struct tgsi_exec_machine * mach)2026 exec_kill(struct tgsi_exec_machine *mach)
2027 {
2028    uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2029 
2030    /* kill fragment for all fragments currently executing */
2031    kilmask = mach->ExecMask;
2032    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2033 }
2034 
2035 static void
emit_vertex(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2036 emit_vertex(struct tgsi_exec_machine *mach,
2037             const struct tgsi_full_instruction *inst)
2038 {
2039    union tgsi_exec_channel r[1];
2040    unsigned stream_id;
2041    unsigned *prim_count;
2042    /* FIXME: check for exec mask correctly
2043    unsigned i;
2044    for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2045          if ((mach->ExecMask & (1 << i)))
2046    */
2047    IFETCH(&r[0], 0, TGSI_CHAN_X);
2048    stream_id = r[0].u[0];
2049    prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
2050    if (mach->ExecMask) {
2051       if (mach->Primitives[stream_id][*prim_count] >= mach->MaxOutputVertices)
2052          return;
2053 
2054       if (mach->Primitives[stream_id][*prim_count] == 0)
2055          mach->PrimitiveOffsets[stream_id][*prim_count] = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0];
2056       mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
2057       mach->Primitives[stream_id][*prim_count]++;
2058    }
2059 }
2060 
2061 static void
emit_primitive(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2062 emit_primitive(struct tgsi_exec_machine *mach,
2063                const struct tgsi_full_instruction *inst)
2064 {
2065    unsigned *prim_count;
2066    union tgsi_exec_channel r[1];
2067    unsigned stream_id = 0;
2068    /* FIXME: check for exec mask correctly
2069    unsigned i;
2070    for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2071          if ((mach->ExecMask & (1 << i)))
2072    */
2073    if (inst) {
2074       IFETCH(&r[0], 0, TGSI_CHAN_X);
2075       stream_id = r[0].u[0];
2076    }
2077    prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0];
2078    if (mach->ExecMask) {
2079       ++(*prim_count);
2080       debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
2081       mach->Primitives[stream_id][*prim_count] = 0;
2082    }
2083 }
2084 
2085 static void
conditional_emit_primitive(struct tgsi_exec_machine * mach)2086 conditional_emit_primitive(struct tgsi_exec_machine *mach)
2087 {
2088    if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
2089       int emitted_verts =
2090          mach->Primitives[0][mach->Temps[temp_prim_idxs[0].idx].xyzw[temp_prim_idxs[0].chan].u[0]];
2091       if (emitted_verts) {
2092          emit_primitive(mach, NULL);
2093       }
2094    }
2095 }
2096 
2097 
2098 /*
2099  * Fetch four texture samples using STR texture coordinates.
2100  */
2101 static void
fetch_texel(struct tgsi_sampler * sampler,const unsigned sview_idx,const unsigned sampler_idx,const union tgsi_exec_channel * s,const union tgsi_exec_channel * t,const union tgsi_exec_channel * p,const union tgsi_exec_channel * c0,const union tgsi_exec_channel * c1,float derivs[3][2][TGSI_QUAD_SIZE],const int8_t offset[3],enum tgsi_sampler_control control,union tgsi_exec_channel * r,union tgsi_exec_channel * g,union tgsi_exec_channel * b,union tgsi_exec_channel * a)2102 fetch_texel( struct tgsi_sampler *sampler,
2103              const unsigned sview_idx,
2104              const unsigned sampler_idx,
2105              const union tgsi_exec_channel *s,
2106              const union tgsi_exec_channel *t,
2107              const union tgsi_exec_channel *p,
2108              const union tgsi_exec_channel *c0,
2109              const union tgsi_exec_channel *c1,
2110              float derivs[3][2][TGSI_QUAD_SIZE],
2111              const int8_t offset[3],
2112              enum tgsi_sampler_control control,
2113              union tgsi_exec_channel *r,
2114              union tgsi_exec_channel *g,
2115              union tgsi_exec_channel *b,
2116              union tgsi_exec_channel *a )
2117 {
2118    uint j;
2119    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2120 
2121    /* FIXME: handle explicit derivs, offsets */
2122    sampler->get_samples(sampler, sview_idx, sampler_idx,
2123                         s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
2124 
2125    for (j = 0; j < 4; j++) {
2126       r->f[j] = rgba[0][j];
2127       g->f[j] = rgba[1][j];
2128       b->f[j] = rgba[2][j];
2129       a->f[j] = rgba[3][j];
2130    }
2131 }
2132 
2133 
2134 #define TEX_MODIFIER_NONE           0
2135 #define TEX_MODIFIER_PROJECTED      1
2136 #define TEX_MODIFIER_LOD_BIAS       2
2137 #define TEX_MODIFIER_EXPLICIT_LOD   3
2138 #define TEX_MODIFIER_LEVEL_ZERO     4
2139 #define TEX_MODIFIER_GATHER         5
2140 
2141 /*
2142  * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
2143  */
2144 static void
fetch_texel_offsets(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int8_t offsets[3])2145 fetch_texel_offsets(struct tgsi_exec_machine *mach,
2146                     const struct tgsi_full_instruction *inst,
2147                     int8_t offsets[3])
2148 {
2149    if (inst->Texture.NumOffsets == 1) {
2150       union tgsi_exec_channel index;
2151       union tgsi_exec_channel offset[3];
2152       index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
2153       fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2154                              inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
2155       fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2156                              inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
2157       fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2158                              inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2159      offsets[0] = offset[0].i[0];
2160      offsets[1] = offset[1].i[0];
2161      offsets[2] = offset[2].i[0];
2162    } else {
2163      assert(inst->Texture.NumOffsets == 0);
2164      offsets[0] = offsets[1] = offsets[2] = 0;
2165    }
2166 }
2167 
2168 
2169 /*
2170  * Fetch dx and dy values for one channel (s, t or r).
2171  * Put dx values into one float array, dy values into another.
2172  */
2173 static void
fetch_assign_deriv_channel(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,unsigned regdsrcx,unsigned chan,float derivs[2][TGSI_QUAD_SIZE])2174 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
2175                            const struct tgsi_full_instruction *inst,
2176                            unsigned regdsrcx,
2177                            unsigned chan,
2178                            float derivs[2][TGSI_QUAD_SIZE])
2179 {
2180    union tgsi_exec_channel d;
2181    FETCH(&d, regdsrcx, chan);
2182    derivs[0][0] = d.f[0];
2183    derivs[0][1] = d.f[1];
2184    derivs[0][2] = d.f[2];
2185    derivs[0][3] = d.f[3];
2186    FETCH(&d, regdsrcx + 1, chan);
2187    derivs[1][0] = d.f[0];
2188    derivs[1][1] = d.f[1];
2189    derivs[1][2] = d.f[2];
2190    derivs[1][3] = d.f[3];
2191 }
2192 
2193 static uint
fetch_sampler_unit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint sampler)2194 fetch_sampler_unit(struct tgsi_exec_machine *mach,
2195                    const struct tgsi_full_instruction *inst,
2196                    uint sampler)
2197 {
2198    uint unit = 0;
2199    int i;
2200    if (inst->Src[sampler].Register.Indirect) {
2201       const struct tgsi_full_src_register *reg = &inst->Src[sampler];
2202       union tgsi_exec_channel indir_index, index2;
2203       const uint execmask = mach->ExecMask;
2204       index2.i[0] =
2205       index2.i[1] =
2206       index2.i[2] =
2207       index2.i[3] = reg->Indirect.Index;
2208 
2209       fetch_src_file_channel(mach,
2210                              reg->Indirect.File,
2211                              reg->Indirect.Swizzle,
2212                              &index2,
2213                              &ZeroVec,
2214                              &indir_index);
2215       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2216          if (execmask & (1 << i)) {
2217             unit = inst->Src[sampler].Register.Index + indir_index.i[i];
2218             break;
2219          }
2220       }
2221 
2222    } else {
2223       unit = inst->Src[sampler].Register.Index;
2224    }
2225    return unit;
2226 }
2227 
2228 /*
2229  * execute a texture instruction.
2230  *
2231  * modifier is used to control the channel routing for the
2232  * instruction variants like proj, lod, and texture with lod bias.
2233  * sampler indicates which src register the sampler is contained in.
2234  */
2235 static void
exec_tex(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint modifier,uint sampler)2236 exec_tex(struct tgsi_exec_machine *mach,
2237          const struct tgsi_full_instruction *inst,
2238          uint modifier, uint sampler)
2239 {
2240    const union tgsi_exec_channel *args[5], *proj = NULL;
2241    union tgsi_exec_channel r[5];
2242    enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2243    uint chan;
2244    uint unit;
2245    int8_t offsets[3];
2246    int dim, shadow_ref, i;
2247 
2248    unit = fetch_sampler_unit(mach, inst, sampler);
2249    /* always fetch all 3 offsets, overkill but keeps code simple */
2250    fetch_texel_offsets(mach, inst, offsets);
2251 
2252    assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
2253    assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
2254 
2255    dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2256    shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
2257 
2258    assert(dim <= 4);
2259    if (shadow_ref >= 0)
2260       assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args));
2261 
2262    /* fetch modifier to the last argument */
2263    if (modifier != TEX_MODIFIER_NONE) {
2264       const int last = ARRAY_SIZE(args) - 1;
2265 
2266       /* fetch modifier from src0.w or src1.x */
2267       if (sampler == 1) {
2268          assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
2269          FETCH(&r[last], 0, TGSI_CHAN_W);
2270       }
2271       else {
2272          FETCH(&r[last], 1, TGSI_CHAN_X);
2273       }
2274 
2275       if (modifier != TEX_MODIFIER_PROJECTED) {
2276          args[last] = &r[last];
2277       }
2278       else {
2279          proj = &r[last];
2280          args[last] = &ZeroVec;
2281       }
2282 
2283       /* point unused arguments to zero vector */
2284       for (i = dim; i < last; i++)
2285          args[i] = &ZeroVec;
2286 
2287       if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
2288          control = TGSI_SAMPLER_LOD_EXPLICIT;
2289       else if (modifier == TEX_MODIFIER_LOD_BIAS)
2290          control = TGSI_SAMPLER_LOD_BIAS;
2291       else if (modifier == TEX_MODIFIER_GATHER)
2292          control = TGSI_SAMPLER_GATHER;
2293    }
2294    else {
2295       for (i = dim; i < (int)ARRAY_SIZE(args); i++)
2296          args[i] = &ZeroVec;
2297    }
2298 
2299    /* fetch coordinates */
2300    for (i = 0; i < dim; i++) {
2301       FETCH(&r[i], 0, TGSI_CHAN_X + i);
2302 
2303       if (proj)
2304          micro_div(&r[i], &r[i], proj);
2305 
2306       args[i] = &r[i];
2307    }
2308 
2309    /* fetch reference value */
2310    if (shadow_ref >= 0) {
2311       FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
2312 
2313       if (proj)
2314          micro_div(&r[shadow_ref], &r[shadow_ref], proj);
2315 
2316       args[shadow_ref] = &r[shadow_ref];
2317    }
2318 
2319    fetch_texel(mach->Sampler, unit, unit,
2320          args[0], args[1], args[2], args[3], args[4],
2321          NULL, offsets, control,
2322          &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2323 
2324 #if 0
2325    debug_printf("fetch r: %g %g %g %g\n",
2326          r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2327    debug_printf("fetch g: %g %g %g %g\n",
2328          r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2329    debug_printf("fetch b: %g %g %g %g\n",
2330          r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2331    debug_printf("fetch a: %g %g %g %g\n",
2332          r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2333 #endif
2334 
2335    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2336       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2337          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2338       }
2339    }
2340 }
2341 
2342 static void
exec_lodq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2343 exec_lodq(struct tgsi_exec_machine *mach,
2344           const struct tgsi_full_instruction *inst)
2345 {
2346    uint resource_unit, sampler_unit;
2347    unsigned dim;
2348    unsigned i;
2349    union tgsi_exec_channel coords[4];
2350    const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
2351    union tgsi_exec_channel r[2];
2352 
2353    resource_unit = fetch_sampler_unit(mach, inst, 1);
2354    if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2355       uint target = mach->SamplerViews[resource_unit].Resource;
2356       dim = tgsi_util_get_texture_coord_dim(target);
2357       sampler_unit = fetch_sampler_unit(mach, inst, 2);
2358    } else {
2359       dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2360       sampler_unit = resource_unit;
2361    }
2362    assert(dim <= ARRAY_SIZE(coords));
2363    /* fetch coordinates */
2364    for (i = 0; i < dim; i++) {
2365       FETCH(&coords[i], 0, TGSI_CHAN_X + i);
2366       args[i] = &coords[i];
2367    }
2368    for (i = dim; i < ARRAY_SIZE(coords); i++) {
2369       args[i] = &ZeroVec;
2370    }
2371    mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,
2372                             args[0]->f,
2373                             args[1]->f,
2374                             args[2]->f,
2375                             args[3]->f,
2376                             TGSI_SAMPLER_LOD_NONE,
2377                             r[0].f,
2378                             r[1].f);
2379 
2380    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2381       store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
2382                  TGSI_EXEC_DATA_FLOAT);
2383    }
2384    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2385       store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
2386                  TGSI_EXEC_DATA_FLOAT);
2387    }
2388    if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2389       unsigned char swizzles[4];
2390       unsigned chan;
2391       swizzles[0] = inst->Src[1].Register.SwizzleX;
2392       swizzles[1] = inst->Src[1].Register.SwizzleY;
2393       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2394       swizzles[3] = inst->Src[1].Register.SwizzleW;
2395 
2396       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2397          if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2398             if (swizzles[chan] >= 2) {
2399                store_dest(mach, &ZeroVec,
2400                           &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2401             } else {
2402                store_dest(mach, &r[swizzles[chan]],
2403                           &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2404             }
2405          }
2406       }
2407    } else {
2408       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2409          store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
2410                     TGSI_EXEC_DATA_FLOAT);
2411       }
2412       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2413          store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
2414                     TGSI_EXEC_DATA_FLOAT);
2415       }
2416    }
2417 }
2418 
2419 static void
exec_txd(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2420 exec_txd(struct tgsi_exec_machine *mach,
2421          const struct tgsi_full_instruction *inst)
2422 {
2423    union tgsi_exec_channel r[4];
2424    float derivs[3][2][TGSI_QUAD_SIZE];
2425    uint chan;
2426    uint unit;
2427    int8_t offsets[3];
2428 
2429    unit = fetch_sampler_unit(mach, inst, 3);
2430    /* always fetch all 3 offsets, overkill but keeps code simple */
2431    fetch_texel_offsets(mach, inst, offsets);
2432 
2433    switch (inst->Texture.Texture) {
2434    case TGSI_TEXTURE_1D:
2435       FETCH(&r[0], 0, TGSI_CHAN_X);
2436 
2437       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2438 
2439       fetch_texel(mach->Sampler, unit, unit,
2440                   &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2441                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2442                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2443       break;
2444 
2445    case TGSI_TEXTURE_SHADOW1D:
2446    case TGSI_TEXTURE_1D_ARRAY:
2447    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2448       /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2449       FETCH(&r[0], 0, TGSI_CHAN_X);
2450       FETCH(&r[1], 0, TGSI_CHAN_Y);
2451       FETCH(&r[2], 0, TGSI_CHAN_Z);
2452 
2453       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2454 
2455       fetch_texel(mach->Sampler, unit, unit,
2456                   &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2457                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2458                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2459       break;
2460 
2461    case TGSI_TEXTURE_2D:
2462    case TGSI_TEXTURE_RECT:
2463       FETCH(&r[0], 0, TGSI_CHAN_X);
2464       FETCH(&r[1], 0, TGSI_CHAN_Y);
2465 
2466       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2467       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2468 
2469       fetch_texel(mach->Sampler, unit, unit,
2470                   &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2471                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2472                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2473       break;
2474 
2475 
2476    case TGSI_TEXTURE_SHADOW2D:
2477    case TGSI_TEXTURE_SHADOWRECT:
2478    case TGSI_TEXTURE_2D_ARRAY:
2479    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2480       /* only SHADOW2D_ARRAY actually needs W */
2481       FETCH(&r[0], 0, TGSI_CHAN_X);
2482       FETCH(&r[1], 0, TGSI_CHAN_Y);
2483       FETCH(&r[2], 0, TGSI_CHAN_Z);
2484       FETCH(&r[3], 0, TGSI_CHAN_W);
2485 
2486       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2487       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2488 
2489       fetch_texel(mach->Sampler, unit, unit,
2490                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
2491                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2492                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2493       break;
2494 
2495    case TGSI_TEXTURE_3D:
2496    case TGSI_TEXTURE_CUBE:
2497    case TGSI_TEXTURE_CUBE_ARRAY:
2498    case TGSI_TEXTURE_SHADOWCUBE:
2499       /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2500       FETCH(&r[0], 0, TGSI_CHAN_X);
2501       FETCH(&r[1], 0, TGSI_CHAN_Y);
2502       FETCH(&r[2], 0, TGSI_CHAN_Z);
2503       FETCH(&r[3], 0, TGSI_CHAN_W);
2504 
2505       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2506       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2507       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2508 
2509       fetch_texel(mach->Sampler, unit, unit,
2510                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
2511                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2512                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2513       break;
2514 
2515    default:
2516       assert(0);
2517    }
2518 
2519    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2520       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2521          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2522       }
2523    }
2524 }
2525 
2526 
2527 static void
exec_txf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2528 exec_txf(struct tgsi_exec_machine *mach,
2529          const struct tgsi_full_instruction *inst)
2530 {
2531    union tgsi_exec_channel r[4];
2532    uint chan;
2533    uint unit;
2534    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2535    int j;
2536    int8_t offsets[3];
2537    unsigned target;
2538 
2539    unit = fetch_sampler_unit(mach, inst, 1);
2540    /* always fetch all 3 offsets, overkill but keeps code simple */
2541    fetch_texel_offsets(mach, inst, offsets);
2542 
2543    IFETCH(&r[3], 0, TGSI_CHAN_W);
2544 
2545    if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2546        inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2547       target = mach->SamplerViews[unit].Resource;
2548    }
2549    else {
2550       target = inst->Texture.Texture;
2551    }
2552    switch(target) {
2553    case TGSI_TEXTURE_3D:
2554    case TGSI_TEXTURE_2D_ARRAY:
2555    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2556    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2557       IFETCH(&r[2], 0, TGSI_CHAN_Z);
2558       /* fallthrough */
2559    case TGSI_TEXTURE_2D:
2560    case TGSI_TEXTURE_RECT:
2561    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2562    case TGSI_TEXTURE_SHADOW2D:
2563    case TGSI_TEXTURE_SHADOWRECT:
2564    case TGSI_TEXTURE_1D_ARRAY:
2565    case TGSI_TEXTURE_2D_MSAA:
2566       IFETCH(&r[1], 0, TGSI_CHAN_Y);
2567       /* fallthrough */
2568    case TGSI_TEXTURE_BUFFER:
2569    case TGSI_TEXTURE_1D:
2570    case TGSI_TEXTURE_SHADOW1D:
2571       IFETCH(&r[0], 0, TGSI_CHAN_X);
2572       break;
2573    default:
2574       assert(0);
2575       break;
2576    }
2577 
2578    mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2579                             offsets, rgba);
2580 
2581    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2582       r[0].f[j] = rgba[0][j];
2583       r[1].f[j] = rgba[1][j];
2584       r[2].f[j] = rgba[2][j];
2585       r[3].f[j] = rgba[3][j];
2586    }
2587 
2588    if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2589        inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2590       unsigned char swizzles[4];
2591       swizzles[0] = inst->Src[1].Register.SwizzleX;
2592       swizzles[1] = inst->Src[1].Register.SwizzleY;
2593       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2594       swizzles[3] = inst->Src[1].Register.SwizzleW;
2595 
2596       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2597          if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2598             store_dest(mach, &r[swizzles[chan]],
2599                        &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2600          }
2601       }
2602    }
2603    else {
2604       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2605          if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2606             store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2607          }
2608       }
2609    }
2610 }
2611 
2612 static void
exec_txq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2613 exec_txq(struct tgsi_exec_machine *mach,
2614          const struct tgsi_full_instruction *inst)
2615 {
2616    int result[4];
2617    union tgsi_exec_channel r[4], src;
2618    uint chan;
2619    uint unit;
2620    int i,j;
2621 
2622    unit = fetch_sampler_unit(mach, inst, 1);
2623 
2624    fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2625 
2626    /* XXX: This interface can't return per-pixel values */
2627    mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2628 
2629    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2630       for (j = 0; j < 4; j++) {
2631          r[j].i[i] = result[j];
2632       }
2633    }
2634 
2635    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2636       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2637          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2638                     TGSI_EXEC_DATA_INT);
2639       }
2640    }
2641 }
2642 
2643 static void
exec_sample(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint modifier,boolean compare)2644 exec_sample(struct tgsi_exec_machine *mach,
2645             const struct tgsi_full_instruction *inst,
2646             uint modifier, boolean compare)
2647 {
2648    const uint resource_unit = inst->Src[1].Register.Index;
2649    const uint sampler_unit = inst->Src[2].Register.Index;
2650    union tgsi_exec_channel r[5], c1;
2651    const union tgsi_exec_channel *lod = &ZeroVec;
2652    enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2653    uint chan;
2654    unsigned char swizzles[4];
2655    int8_t offsets[3];
2656 
2657    /* always fetch all 3 offsets, overkill but keeps code simple */
2658    fetch_texel_offsets(mach, inst, offsets);
2659 
2660    assert(modifier != TEX_MODIFIER_PROJECTED);
2661 
2662    if (modifier != TEX_MODIFIER_NONE) {
2663       if (modifier == TEX_MODIFIER_LOD_BIAS) {
2664          FETCH(&c1, 3, TGSI_CHAN_X);
2665          lod = &c1;
2666          control = TGSI_SAMPLER_LOD_BIAS;
2667       }
2668       else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2669          FETCH(&c1, 3, TGSI_CHAN_X);
2670          lod = &c1;
2671          control = TGSI_SAMPLER_LOD_EXPLICIT;
2672       }
2673       else if (modifier == TEX_MODIFIER_GATHER) {
2674          control = TGSI_SAMPLER_GATHER;
2675       }
2676       else {
2677          assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2678          control = TGSI_SAMPLER_LOD_ZERO;
2679       }
2680    }
2681 
2682    FETCH(&r[0], 0, TGSI_CHAN_X);
2683 
2684    switch (mach->SamplerViews[resource_unit].Resource) {
2685    case TGSI_TEXTURE_1D:
2686       if (compare) {
2687          FETCH(&r[2], 3, TGSI_CHAN_X);
2688          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2689                      &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2690                      NULL, offsets, control,
2691                      &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2692       }
2693       else {
2694          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2695                      &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2696                      NULL, offsets, control,
2697                      &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2698       }
2699       break;
2700 
2701    case TGSI_TEXTURE_1D_ARRAY:
2702    case TGSI_TEXTURE_2D:
2703    case TGSI_TEXTURE_RECT:
2704       FETCH(&r[1], 0, TGSI_CHAN_Y);
2705       if (compare) {
2706          FETCH(&r[2], 3, TGSI_CHAN_X);
2707          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2708                      &r[0], &r[1], &r[2], &ZeroVec, lod,    /* S, T, P, C, LOD */
2709                      NULL, offsets, control,
2710                      &r[0], &r[1], &r[2], &r[3]);  /* outputs */
2711       }
2712       else {
2713          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2714                      &r[0], &r[1], &ZeroVec, &ZeroVec, lod,    /* S, T, P, C, LOD */
2715                      NULL, offsets, control,
2716                      &r[0], &r[1], &r[2], &r[3]);  /* outputs */
2717       }
2718       break;
2719 
2720    case TGSI_TEXTURE_2D_ARRAY:
2721    case TGSI_TEXTURE_3D:
2722    case TGSI_TEXTURE_CUBE:
2723       FETCH(&r[1], 0, TGSI_CHAN_Y);
2724       FETCH(&r[2], 0, TGSI_CHAN_Z);
2725       if(compare) {
2726          FETCH(&r[3], 3, TGSI_CHAN_X);
2727          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2728                      &r[0], &r[1], &r[2], &r[3], lod,
2729                      NULL, offsets, control,
2730                      &r[0], &r[1], &r[2], &r[3]);
2731       }
2732       else {
2733          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2734                      &r[0], &r[1], &r[2], &ZeroVec, lod,
2735                      NULL, offsets, control,
2736                      &r[0], &r[1], &r[2], &r[3]);
2737       }
2738       break;
2739 
2740    case TGSI_TEXTURE_CUBE_ARRAY:
2741       FETCH(&r[1], 0, TGSI_CHAN_Y);
2742       FETCH(&r[2], 0, TGSI_CHAN_Z);
2743       FETCH(&r[3], 0, TGSI_CHAN_W);
2744       if(compare) {
2745          FETCH(&r[4], 3, TGSI_CHAN_X);
2746          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2747                      &r[0], &r[1], &r[2], &r[3], &r[4],
2748                      NULL, offsets, control,
2749                      &r[0], &r[1], &r[2], &r[3]);
2750       }
2751       else {
2752          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2753                      &r[0], &r[1], &r[2], &r[3], lod,
2754                      NULL, offsets, control,
2755                      &r[0], &r[1], &r[2], &r[3]);
2756       }
2757       break;
2758 
2759 
2760    default:
2761       assert(0);
2762    }
2763 
2764    swizzles[0] = inst->Src[1].Register.SwizzleX;
2765    swizzles[1] = inst->Src[1].Register.SwizzleY;
2766    swizzles[2] = inst->Src[1].Register.SwizzleZ;
2767    swizzles[3] = inst->Src[1].Register.SwizzleW;
2768 
2769    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2770       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2771          store_dest(mach, &r[swizzles[chan]],
2772                     &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2773       }
2774    }
2775 }
2776 
2777 static void
exec_sample_d(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2778 exec_sample_d(struct tgsi_exec_machine *mach,
2779               const struct tgsi_full_instruction *inst)
2780 {
2781    const uint resource_unit = inst->Src[1].Register.Index;
2782    const uint sampler_unit = inst->Src[2].Register.Index;
2783    union tgsi_exec_channel r[4];
2784    float derivs[3][2][TGSI_QUAD_SIZE];
2785    uint chan;
2786    unsigned char swizzles[4];
2787    int8_t offsets[3];
2788 
2789    /* always fetch all 3 offsets, overkill but keeps code simple */
2790    fetch_texel_offsets(mach, inst, offsets);
2791 
2792    FETCH(&r[0], 0, TGSI_CHAN_X);
2793 
2794    switch (mach->SamplerViews[resource_unit].Resource) {
2795    case TGSI_TEXTURE_1D:
2796    case TGSI_TEXTURE_1D_ARRAY:
2797       /* only 1D array actually needs Y */
2798       FETCH(&r[1], 0, TGSI_CHAN_Y);
2799 
2800       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2801 
2802       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2803                   &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2804                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2805                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2806       break;
2807 
2808    case TGSI_TEXTURE_2D:
2809    case TGSI_TEXTURE_RECT:
2810    case TGSI_TEXTURE_2D_ARRAY:
2811       /* only 2D array actually needs Z */
2812       FETCH(&r[1], 0, TGSI_CHAN_Y);
2813       FETCH(&r[2], 0, TGSI_CHAN_Z);
2814 
2815       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2816       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2817 
2818       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2819                   &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* inputs */
2820                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2821                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2822       break;
2823 
2824    case TGSI_TEXTURE_3D:
2825    case TGSI_TEXTURE_CUBE:
2826    case TGSI_TEXTURE_CUBE_ARRAY:
2827       /* only cube array actually needs W */
2828       FETCH(&r[1], 0, TGSI_CHAN_Y);
2829       FETCH(&r[2], 0, TGSI_CHAN_Z);
2830       FETCH(&r[3], 0, TGSI_CHAN_W);
2831 
2832       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2833       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2834       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2835 
2836       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2837                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2838                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2839                   &r[0], &r[1], &r[2], &r[3]);
2840       break;
2841 
2842    default:
2843       assert(0);
2844    }
2845 
2846    swizzles[0] = inst->Src[1].Register.SwizzleX;
2847    swizzles[1] = inst->Src[1].Register.SwizzleY;
2848    swizzles[2] = inst->Src[1].Register.SwizzleZ;
2849    swizzles[3] = inst->Src[1].Register.SwizzleW;
2850 
2851    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2852       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2853          store_dest(mach, &r[swizzles[chan]],
2854                     &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2855       }
2856    }
2857 }
2858 
2859 
2860 /**
2861  * Evaluate a constant-valued coefficient at the position of the
2862  * current quad.
2863  */
2864 static void
eval_constant_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2865 eval_constant_coef(
2866    struct tgsi_exec_machine *mach,
2867    unsigned attrib,
2868    unsigned chan )
2869 {
2870    unsigned i;
2871 
2872    for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2873       mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2874    }
2875 }
2876 
2877 static void
interp_constant_offset(UNUSED const struct tgsi_exec_machine * mach,UNUSED unsigned attrib,UNUSED unsigned chan,UNUSED float ofs_x,UNUSED float ofs_y,UNUSED union tgsi_exec_channel * out_chan)2878 interp_constant_offset(
2879       UNUSED const struct tgsi_exec_machine *mach,
2880       UNUSED unsigned attrib,
2881       UNUSED unsigned chan,
2882       UNUSED float ofs_x,
2883       UNUSED float ofs_y,
2884       UNUSED union tgsi_exec_channel *out_chan)
2885 {
2886 }
2887 
2888 /**
2889  * Evaluate a linear-valued coefficient at the position of the
2890  * current quad.
2891  */
2892 static void
interp_linear_offset(const struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan,float ofs_x,float ofs_y,union tgsi_exec_channel * out_chan)2893 interp_linear_offset(
2894       const struct tgsi_exec_machine *mach,
2895       unsigned attrib,
2896       unsigned chan,
2897       float ofs_x,
2898       float ofs_y,
2899       union tgsi_exec_channel *out_chan)
2900 {
2901    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2902    const float dady = mach->InterpCoefs[attrib].dady[chan];
2903    const float delta = ofs_x * dadx + ofs_y * dady;
2904    out_chan->f[0] += delta;
2905    out_chan->f[1] += delta;
2906    out_chan->f[2] += delta;
2907    out_chan->f[3] += delta;
2908 }
2909 
2910 static void
eval_linear_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2911 eval_linear_coef(struct tgsi_exec_machine *mach,
2912                  unsigned attrib,
2913                  unsigned chan)
2914 {
2915    const float x = mach->QuadPos.xyzw[0].f[0];
2916    const float y = mach->QuadPos.xyzw[1].f[0];
2917    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2918    const float dady = mach->InterpCoefs[attrib].dady[chan];
2919    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2920 
2921    mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2922    mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2923    mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2924    mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2925 }
2926 
2927 /**
2928  * Evaluate a perspective-valued coefficient at the position of the
2929  * current quad.
2930  */
2931 
2932 static void
interp_perspective_offset(const struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan,float ofs_x,float ofs_y,union tgsi_exec_channel * out_chan)2933 interp_perspective_offset(
2934    const struct tgsi_exec_machine *mach,
2935    unsigned attrib,
2936    unsigned chan,
2937    float ofs_x,
2938    float ofs_y,
2939    union tgsi_exec_channel *out_chan)
2940 {
2941    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2942    const float dady = mach->InterpCoefs[attrib].dady[chan];
2943    const float *w = mach->QuadPos.xyzw[3].f;
2944    const float delta = ofs_x * dadx + ofs_y * dady;
2945    out_chan->f[0] += delta / w[0];
2946    out_chan->f[1] += delta / w[1];
2947    out_chan->f[2] += delta / w[2];
2948    out_chan->f[3] += delta / w[3];
2949 }
2950 
2951 static void
eval_perspective_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2952 eval_perspective_coef(
2953    struct tgsi_exec_machine *mach,
2954    unsigned attrib,
2955    unsigned chan )
2956 {
2957    const float x = mach->QuadPos.xyzw[0].f[0];
2958    const float y = mach->QuadPos.xyzw[1].f[0];
2959    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2960    const float dady = mach->InterpCoefs[attrib].dady[chan];
2961    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2962    const float *w = mach->QuadPos.xyzw[3].f;
2963    /* divide by W here */
2964    mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2965    mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2966    mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2967    mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2968 }
2969 
2970 
2971 typedef void (* eval_coef_func)(
2972    struct tgsi_exec_machine *mach,
2973    unsigned attrib,
2974    unsigned chan );
2975 
2976 static void
exec_declaration(struct tgsi_exec_machine * mach,const struct tgsi_full_declaration * decl)2977 exec_declaration(struct tgsi_exec_machine *mach,
2978                  const struct tgsi_full_declaration *decl)
2979 {
2980    if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2981       mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2982       return;
2983    }
2984 
2985    if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
2986       if (decl->Declaration.File == TGSI_FILE_INPUT) {
2987          uint first, last, mask;
2988 
2989          first = decl->Range.First;
2990          last = decl->Range.Last;
2991          mask = decl->Declaration.UsageMask;
2992 
2993          /* XXX we could remove this special-case code since
2994           * mach->InterpCoefs[first].a0 should already have the
2995           * front/back-face value.  But we should first update the
2996           * ureg code to emit the right UsageMask value (WRITEMASK_X).
2997           * Then, we could remove the tgsi_exec_machine::Face field.
2998           */
2999          /* XXX make FACE a system value */
3000          if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
3001             uint i;
3002 
3003             assert(decl->Semantic.Index == 0);
3004             assert(first == last);
3005 
3006             for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3007                mach->Inputs[first].xyzw[0].f[i] = mach->Face;
3008             }
3009          } else {
3010             eval_coef_func eval;
3011             apply_sample_offset_func interp;
3012             uint i, j;
3013 
3014             switch (decl->Interp.Interpolate) {
3015             case TGSI_INTERPOLATE_CONSTANT:
3016                eval = eval_constant_coef;
3017                interp = interp_constant_offset;
3018                break;
3019 
3020             case TGSI_INTERPOLATE_LINEAR:
3021                eval = eval_linear_coef;
3022                interp = interp_linear_offset;
3023                break;
3024 
3025             case TGSI_INTERPOLATE_PERSPECTIVE:
3026                eval = eval_perspective_coef;
3027                interp = interp_perspective_offset;
3028                break;
3029 
3030             case TGSI_INTERPOLATE_COLOR:
3031                eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
3032                interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset;
3033                break;
3034 
3035             default:
3036                assert(0);
3037                return;
3038             }
3039 
3040             for (i = first; i <= last; i++)
3041                mach->InputSampleOffsetApply[i] = interp;
3042 
3043             for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3044                if (mask & (1 << j)) {
3045                   for (i = first; i <= last; i++) {
3046                      eval(mach, i, j);
3047                   }
3048                }
3049             }
3050          }
3051 
3052          if (DEBUG_EXECUTION) {
3053             uint i, j;
3054             for (i = first; i <= last; ++i) {
3055                debug_printf("IN[%2u] = ", i);
3056                for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3057                   if (j > 0) {
3058                      debug_printf("         ");
3059                   }
3060                   debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3061                                mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
3062                                mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
3063                                mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
3064                                mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
3065                }
3066             }
3067          }
3068       }
3069    }
3070 
3071 }
3072 
3073 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
3074                                 const union tgsi_exec_channel *src);
3075 
3076 static void
exec_scalar_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3077 exec_scalar_unary(struct tgsi_exec_machine *mach,
3078                   const struct tgsi_full_instruction *inst,
3079                   micro_unary_op op,
3080                   enum tgsi_exec_datatype dst_datatype,
3081                   enum tgsi_exec_datatype src_datatype)
3082 {
3083    unsigned int chan;
3084    union tgsi_exec_channel src;
3085    union tgsi_exec_channel dst;
3086 
3087    fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
3088    op(&dst, &src);
3089    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3090       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3091          store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3092       }
3093    }
3094 }
3095 
3096 static void
exec_vector_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3097 exec_vector_unary(struct tgsi_exec_machine *mach,
3098                   const struct tgsi_full_instruction *inst,
3099                   micro_unary_op op,
3100                   enum tgsi_exec_datatype dst_datatype,
3101                   enum tgsi_exec_datatype src_datatype)
3102 {
3103    unsigned int chan;
3104    struct tgsi_exec_vector dst;
3105 
3106    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3107       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3108          union tgsi_exec_channel src;
3109 
3110          fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
3111          op(&dst.xyzw[chan], &src);
3112       }
3113    }
3114    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3115       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3116          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3117       }
3118    }
3119 }
3120 
3121 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
3122                                  const union tgsi_exec_channel *src0,
3123                                  const union tgsi_exec_channel *src1);
3124 
3125 static void
exec_scalar_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3126 exec_scalar_binary(struct tgsi_exec_machine *mach,
3127                    const struct tgsi_full_instruction *inst,
3128                    micro_binary_op op,
3129                    enum tgsi_exec_datatype dst_datatype,
3130                    enum tgsi_exec_datatype src_datatype)
3131 {
3132    unsigned int chan;
3133    union tgsi_exec_channel src[2];
3134    union tgsi_exec_channel dst;
3135 
3136    fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
3137    fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);
3138    op(&dst, &src[0], &src[1]);
3139    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3140       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3141          store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3142       }
3143    }
3144 }
3145 
3146 static void
exec_vector_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3147 exec_vector_binary(struct tgsi_exec_machine *mach,
3148                    const struct tgsi_full_instruction *inst,
3149                    micro_binary_op op,
3150                    enum tgsi_exec_datatype dst_datatype,
3151                    enum tgsi_exec_datatype src_datatype)
3152 {
3153    unsigned int chan;
3154    struct tgsi_exec_vector dst;
3155 
3156    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3157       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3158          union tgsi_exec_channel src[2];
3159 
3160          fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3161          fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3162          op(&dst.xyzw[chan], &src[0], &src[1]);
3163       }
3164    }
3165    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3166       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3167          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3168       }
3169    }
3170 }
3171 
3172 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
3173                                   const union tgsi_exec_channel *src0,
3174                                   const union tgsi_exec_channel *src1,
3175                                   const union tgsi_exec_channel *src2);
3176 
3177 static void
exec_vector_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_trinary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3178 exec_vector_trinary(struct tgsi_exec_machine *mach,
3179                     const struct tgsi_full_instruction *inst,
3180                     micro_trinary_op op,
3181                     enum tgsi_exec_datatype dst_datatype,
3182                     enum tgsi_exec_datatype src_datatype)
3183 {
3184    unsigned int chan;
3185    struct tgsi_exec_vector dst;
3186 
3187    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3188       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3189          union tgsi_exec_channel src[3];
3190 
3191          fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3192          fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3193          fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3194          op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3195       }
3196    }
3197    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3198       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3199          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3200       }
3201    }
3202 }
3203 
3204 typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,
3205                                      const union tgsi_exec_channel *src0,
3206                                      const union tgsi_exec_channel *src1,
3207                                      const union tgsi_exec_channel *src2,
3208                                      const union tgsi_exec_channel *src3);
3209 
3210 static void
exec_vector_quaternary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_quaternary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3211 exec_vector_quaternary(struct tgsi_exec_machine *mach,
3212                        const struct tgsi_full_instruction *inst,
3213                        micro_quaternary_op op,
3214                        enum tgsi_exec_datatype dst_datatype,
3215                        enum tgsi_exec_datatype src_datatype)
3216 {
3217    unsigned int chan;
3218    struct tgsi_exec_vector dst;
3219 
3220    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3221       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3222          union tgsi_exec_channel src[4];
3223 
3224          fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3225          fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3226          fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3227          fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);
3228          op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);
3229       }
3230    }
3231    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3232       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3233          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3234       }
3235    }
3236 }
3237 
3238 static void
exec_dp3(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3239 exec_dp3(struct tgsi_exec_machine *mach,
3240          const struct tgsi_full_instruction *inst)
3241 {
3242    unsigned int chan;
3243    union tgsi_exec_channel arg[3];
3244 
3245    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3246    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3247    micro_mul(&arg[2], &arg[0], &arg[1]);
3248 
3249    for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
3250       fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3251       fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3252       micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3253    }
3254 
3255    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3256       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3257          store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3258       }
3259    }
3260 }
3261 
3262 static void
exec_dp4(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3263 exec_dp4(struct tgsi_exec_machine *mach,
3264          const struct tgsi_full_instruction *inst)
3265 {
3266    unsigned int chan;
3267    union tgsi_exec_channel arg[3];
3268 
3269    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3270    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3271    micro_mul(&arg[2], &arg[0], &arg[1]);
3272 
3273    for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
3274       fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3275       fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3276       micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3277    }
3278 
3279    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3280       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3281          store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3282       }
3283    }
3284 }
3285 
3286 static void
exec_dp2(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3287 exec_dp2(struct tgsi_exec_machine *mach,
3288          const struct tgsi_full_instruction *inst)
3289 {
3290    unsigned int chan;
3291    union tgsi_exec_channel arg[3];
3292 
3293    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3294    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3295    micro_mul(&arg[2], &arg[0], &arg[1]);
3296 
3297    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3298    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3299    micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3300 
3301    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3302       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3303          store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3304       }
3305    }
3306 }
3307 
3308 static void
exec_pk2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3309 exec_pk2h(struct tgsi_exec_machine *mach,
3310           const struct tgsi_full_instruction *inst)
3311 {
3312    unsigned chan;
3313    union tgsi_exec_channel arg[2], dst;
3314 
3315    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3316    fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3317    for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3318       dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
3319          (util_float_to_half(arg[1].f[chan]) << 16);
3320    }
3321    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3322       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3323          store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
3324       }
3325    }
3326 }
3327 
3328 static void
exec_up2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3329 exec_up2h(struct tgsi_exec_machine *mach,
3330           const struct tgsi_full_instruction *inst)
3331 {
3332    unsigned chan;
3333    union tgsi_exec_channel arg, dst[2];
3334 
3335    fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3336    for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3337       dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
3338       dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
3339    }
3340    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3341       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3342          store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3343       }
3344    }
3345 }
3346 
3347 static void
micro_ucmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)3348 micro_ucmp(union tgsi_exec_channel *dst,
3349            const union tgsi_exec_channel *src0,
3350            const union tgsi_exec_channel *src1,
3351            const union tgsi_exec_channel *src2)
3352 {
3353    dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
3354    dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
3355    dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
3356    dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
3357 }
3358 
3359 static void
exec_ucmp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3360 exec_ucmp(struct tgsi_exec_machine *mach,
3361           const struct tgsi_full_instruction *inst)
3362 {
3363    unsigned int chan;
3364    struct tgsi_exec_vector dst;
3365 
3366    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3367       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3368          union tgsi_exec_channel src[3];
3369 
3370          fetch_source(mach, &src[0], &inst->Src[0], chan,
3371                       TGSI_EXEC_DATA_UINT);
3372          fetch_source(mach, &src[1], &inst->Src[1], chan,
3373                       TGSI_EXEC_DATA_FLOAT);
3374          fetch_source(mach, &src[2], &inst->Src[2], chan,
3375                       TGSI_EXEC_DATA_FLOAT);
3376          micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3377       }
3378    }
3379    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3380       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3381          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan,
3382                     TGSI_EXEC_DATA_FLOAT);
3383       }
3384    }
3385 }
3386 
3387 static void
exec_dst(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3388 exec_dst(struct tgsi_exec_machine *mach,
3389          const struct tgsi_full_instruction *inst)
3390 {
3391    union tgsi_exec_channel r[2];
3392    union tgsi_exec_channel d[4];
3393 
3394    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3395       fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3396       fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3397       micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
3398    }
3399    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3400       fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3401    }
3402    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3403       fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3404    }
3405 
3406    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3407       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3408    }
3409    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3410       store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3411    }
3412    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3413       store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3414    }
3415    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3416       store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3417    }
3418 }
3419 
3420 static void
exec_log(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3421 exec_log(struct tgsi_exec_machine *mach,
3422          const struct tgsi_full_instruction *inst)
3423 {
3424    union tgsi_exec_channel r[3];
3425 
3426    fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3427    micro_abs(&r[2], &r[0]);  /* r2 = abs(r0) */
3428    micro_lg2(&r[1], &r[2]);  /* r1 = lg2(r2) */
3429    micro_flr(&r[0], &r[1]);  /* r0 = floor(r1) */
3430    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3431       store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3432    }
3433    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3434       micro_exp2(&r[0], &r[0]);       /* r0 = 2 ^ r0 */
3435       micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
3436       store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3437    }
3438    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3439       store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3440    }
3441    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3442       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3443    }
3444 }
3445 
3446 static void
exec_exp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3447 exec_exp(struct tgsi_exec_machine *mach,
3448          const struct tgsi_full_instruction *inst)
3449 {
3450    union tgsi_exec_channel r[3];
3451 
3452    fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3453    micro_flr(&r[1], &r[0]);  /* r1 = floor(r0) */
3454    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3455       micro_exp2(&r[2], &r[1]);       /* r2 = 2 ^ r1 */
3456       store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3457    }
3458    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3459       micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3460       store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3461    }
3462    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3463       micro_exp2(&r[2], &r[0]);       /* r2 = 2 ^ r0 */
3464       store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3465    }
3466    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3467       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3468    }
3469 }
3470 
3471 static void
exec_lit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3472 exec_lit(struct tgsi_exec_machine *mach,
3473          const struct tgsi_full_instruction *inst)
3474 {
3475    union tgsi_exec_channel r[3];
3476    union tgsi_exec_channel d[3];
3477 
3478    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3479       fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3480       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3481          fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3482          micro_max(&r[1], &r[1], &ZeroVec);
3483 
3484          fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3485          micro_min(&r[2], &r[2], &P128Vec);
3486          micro_max(&r[2], &r[2], &M128Vec);
3487          micro_pow(&r[1], &r[1], &r[2]);
3488          micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3489          store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3490       }
3491       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3492          micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3493          store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3494       }
3495    }
3496    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3497       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3498    }
3499 
3500    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3501       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3502    }
3503 }
3504 
3505 static void
exec_break(struct tgsi_exec_machine * mach)3506 exec_break(struct tgsi_exec_machine *mach)
3507 {
3508    if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3509       /* turn off loop channels for each enabled exec channel */
3510       mach->LoopMask &= ~mach->ExecMask;
3511       /* Todo: if mach->LoopMask == 0, jump to end of loop */
3512       UPDATE_EXEC_MASK(mach);
3513    } else {
3514       assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3515 
3516       mach->Switch.mask = 0x0;
3517 
3518       UPDATE_EXEC_MASK(mach);
3519    }
3520 }
3521 
3522 static void
exec_switch(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3523 exec_switch(struct tgsi_exec_machine *mach,
3524             const struct tgsi_full_instruction *inst)
3525 {
3526    assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3527    assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3528 
3529    mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3530    fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3531    mach->Switch.mask = 0x0;
3532    mach->Switch.defaultMask = 0x0;
3533 
3534    mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3535    mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3536 
3537    UPDATE_EXEC_MASK(mach);
3538 }
3539 
3540 static void
exec_case(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3541 exec_case(struct tgsi_exec_machine *mach,
3542           const struct tgsi_full_instruction *inst)
3543 {
3544    uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3545    union tgsi_exec_channel src;
3546    uint mask = 0;
3547 
3548    fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3549 
3550    if (mach->Switch.selector.u[0] == src.u[0]) {
3551       mask |= 0x1;
3552    }
3553    if (mach->Switch.selector.u[1] == src.u[1]) {
3554       mask |= 0x2;
3555    }
3556    if (mach->Switch.selector.u[2] == src.u[2]) {
3557       mask |= 0x4;
3558    }
3559    if (mach->Switch.selector.u[3] == src.u[3]) {
3560       mask |= 0x8;
3561    }
3562 
3563    mach->Switch.defaultMask |= mask;
3564 
3565    mach->Switch.mask |= mask & prevMask;
3566 
3567    UPDATE_EXEC_MASK(mach);
3568 }
3569 
3570 /* FIXME: this will only work if default is last */
3571 static void
exec_default(struct tgsi_exec_machine * mach)3572 exec_default(struct tgsi_exec_machine *mach)
3573 {
3574    uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3575 
3576    mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3577 
3578    UPDATE_EXEC_MASK(mach);
3579 }
3580 
3581 static void
exec_endswitch(struct tgsi_exec_machine * mach)3582 exec_endswitch(struct tgsi_exec_machine *mach)
3583 {
3584    mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3585    mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3586 
3587    UPDATE_EXEC_MASK(mach);
3588 }
3589 
3590 typedef void (* micro_dop)(union tgsi_double_channel *dst,
3591                            const union tgsi_double_channel *src);
3592 
3593 typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
3594                                const union tgsi_double_channel *src0,
3595                                union tgsi_exec_channel *src1);
3596 
3597 typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
3598                              const union tgsi_exec_channel *src);
3599 
3600 typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
3601                              const union tgsi_double_channel *src);
3602 
3603 static void
fetch_double_channel(struct tgsi_exec_machine * mach,union tgsi_double_channel * chan,const struct tgsi_full_src_register * reg,uint chan_0,uint chan_1)3604 fetch_double_channel(struct tgsi_exec_machine *mach,
3605                      union tgsi_double_channel *chan,
3606                      const struct tgsi_full_src_register *reg,
3607                      uint chan_0,
3608                      uint chan_1)
3609 {
3610    union tgsi_exec_channel src[2];
3611    uint i;
3612 
3613    fetch_source_d(mach, &src[0], reg, chan_0);
3614    fetch_source_d(mach, &src[1], reg, chan_1);
3615 
3616    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3617       chan->u[i][0] = src[0].u[i];
3618       chan->u[i][1] = src[1].u[i];
3619    }
3620    if (reg->Register.Absolute) {
3621       micro_dabs(chan, chan);
3622    }
3623    if (reg->Register.Negate) {
3624       micro_dneg(chan, chan);
3625    }
3626 }
3627 
3628 static void
store_double_channel(struct tgsi_exec_machine * mach,const union tgsi_double_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_0,uint chan_1)3629 store_double_channel(struct tgsi_exec_machine *mach,
3630                      const union tgsi_double_channel *chan,
3631                      const struct tgsi_full_dst_register *reg,
3632                      const struct tgsi_full_instruction *inst,
3633                      uint chan_0,
3634                      uint chan_1)
3635 {
3636    union tgsi_exec_channel dst[2];
3637    uint i;
3638    union tgsi_double_channel temp;
3639    const uint execmask = mach->ExecMask;
3640 
3641    if (!inst->Instruction.Saturate) {
3642       for (i = 0; i < TGSI_QUAD_SIZE; i++)
3643          if (execmask & (1 << i)) {
3644             dst[0].u[i] = chan->u[i][0];
3645             dst[1].u[i] = chan->u[i][1];
3646          }
3647    }
3648    else {
3649       for (i = 0; i < TGSI_QUAD_SIZE; i++)
3650          if (execmask & (1 << i)) {
3651             if (chan->d[i] < 0.0)
3652                temp.d[i] = 0.0;
3653             else if (chan->d[i] > 1.0)
3654                temp.d[i] = 1.0;
3655             else
3656                temp.d[i] = chan->d[i];
3657 
3658             dst[0].u[i] = temp.u[i][0];
3659             dst[1].u[i] = temp.u[i][1];
3660          }
3661    }
3662 
3663    store_dest_double(mach, &dst[0], reg, chan_0, TGSI_EXEC_DATA_UINT);
3664    if (chan_1 != (unsigned)-1)
3665       store_dest_double(mach, &dst[1], reg, chan_1, TGSI_EXEC_DATA_UINT);
3666 }
3667 
3668 static void
exec_double_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3669 exec_double_unary(struct tgsi_exec_machine *mach,
3670                   const struct tgsi_full_instruction *inst,
3671                   micro_dop op)
3672 {
3673    union tgsi_double_channel src;
3674    union tgsi_double_channel dst;
3675 
3676    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3677       fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3678       op(&dst, &src);
3679       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3680    }
3681    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3682       fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3683       op(&dst, &src);
3684       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3685    }
3686 }
3687 
3688 static void
exec_double_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op,enum tgsi_exec_datatype dst_datatype)3689 exec_double_binary(struct tgsi_exec_machine *mach,
3690                    const struct tgsi_full_instruction *inst,
3691                    micro_dop op,
3692                    enum tgsi_exec_datatype dst_datatype)
3693 {
3694    union tgsi_double_channel src[2];
3695    union tgsi_double_channel dst;
3696    int first_dest_chan, second_dest_chan;
3697    int wmask;
3698 
3699    wmask = inst->Dst[0].Register.WriteMask;
3700    /* these are & because of the way DSLT etc store their destinations */
3701    if (wmask & TGSI_WRITEMASK_XY) {
3702       first_dest_chan = TGSI_CHAN_X;
3703       second_dest_chan = TGSI_CHAN_Y;
3704       if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3705          first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;
3706          second_dest_chan = -1;
3707       }
3708 
3709       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3710       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3711       op(&dst, src);
3712       store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3713    }
3714 
3715    if (wmask & TGSI_WRITEMASK_ZW) {
3716       first_dest_chan = TGSI_CHAN_Z;
3717       second_dest_chan = TGSI_CHAN_W;
3718       if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3719          first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;
3720          second_dest_chan = -1;
3721       }
3722 
3723       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3724       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3725       op(&dst, src);
3726       store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3727    }
3728 }
3729 
3730 static void
exec_double_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3731 exec_double_trinary(struct tgsi_exec_machine *mach,
3732                     const struct tgsi_full_instruction *inst,
3733                     micro_dop op)
3734 {
3735    union tgsi_double_channel src[3];
3736    union tgsi_double_channel dst;
3737 
3738    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3739       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3740       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3741       fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);
3742       op(&dst, src);
3743       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3744    }
3745    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3746       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3747       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3748       fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
3749       op(&dst, src);
3750       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3751    }
3752 }
3753 
3754 static void
exec_dldexp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3755 exec_dldexp(struct tgsi_exec_machine *mach,
3756             const struct tgsi_full_instruction *inst)
3757 {
3758    union tgsi_double_channel src0;
3759    union tgsi_exec_channel src1;
3760    union tgsi_double_channel dst;
3761    int wmask;
3762 
3763    wmask = inst->Dst[0].Register.WriteMask;
3764    if (wmask & TGSI_WRITEMASK_XY) {
3765       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3766       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3767       micro_dldexp(&dst, &src0, &src1);
3768       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3769    }
3770 
3771    if (wmask & TGSI_WRITEMASK_ZW) {
3772       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3773       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3774       micro_dldexp(&dst, &src0, &src1);
3775       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3776    }
3777 }
3778 
3779 static void
exec_dfracexp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3780 exec_dfracexp(struct tgsi_exec_machine *mach,
3781               const struct tgsi_full_instruction *inst)
3782 {
3783    union tgsi_double_channel src;
3784    union tgsi_double_channel dst;
3785    union tgsi_exec_channel dst_exp;
3786 
3787    fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3788    micro_dfracexp(&dst, &dst_exp, &src);
3789    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)
3790       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3791    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)
3792       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3793    for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3794       if (inst->Dst[1].Register.WriteMask & (1 << chan))
3795          store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan, TGSI_EXEC_DATA_INT);
3796    }
3797 }
3798 
3799 static void
exec_arg0_64_arg1_32(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_sop op)3800 exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
3801             const struct tgsi_full_instruction *inst,
3802             micro_dop_sop op)
3803 {
3804    union tgsi_double_channel src0;
3805    union tgsi_exec_channel src1;
3806    union tgsi_double_channel dst;
3807    int wmask;
3808 
3809    wmask = inst->Dst[0].Register.WriteMask;
3810    if (wmask & TGSI_WRITEMASK_XY) {
3811       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3812       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3813       op(&dst, &src0, &src1);
3814       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3815    }
3816 
3817    if (wmask & TGSI_WRITEMASK_ZW) {
3818       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3819       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3820       op(&dst, &src0, &src1);
3821       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3822    }
3823 }
3824 
3825 static int
get_image_coord_dim(unsigned tgsi_tex)3826 get_image_coord_dim(unsigned tgsi_tex)
3827 {
3828    int dim;
3829    switch (tgsi_tex) {
3830    case TGSI_TEXTURE_BUFFER:
3831    case TGSI_TEXTURE_1D:
3832       dim = 1;
3833       break;
3834    case TGSI_TEXTURE_2D:
3835    case TGSI_TEXTURE_RECT:
3836    case TGSI_TEXTURE_1D_ARRAY:
3837    case TGSI_TEXTURE_2D_MSAA:
3838       dim = 2;
3839       break;
3840    case TGSI_TEXTURE_3D:
3841    case TGSI_TEXTURE_CUBE:
3842    case TGSI_TEXTURE_2D_ARRAY:
3843    case TGSI_TEXTURE_2D_ARRAY_MSAA:
3844    case TGSI_TEXTURE_CUBE_ARRAY:
3845       dim = 3;
3846       break;
3847    default:
3848       assert(!"unknown texture target");
3849       dim = 0;
3850       break;
3851    }
3852 
3853    return dim;
3854 }
3855 
3856 static int
get_image_coord_sample(unsigned tgsi_tex)3857 get_image_coord_sample(unsigned tgsi_tex)
3858 {
3859    int sample = 0;
3860    switch (tgsi_tex) {
3861    case TGSI_TEXTURE_2D_MSAA:
3862       sample = 3;
3863       break;
3864    case TGSI_TEXTURE_2D_ARRAY_MSAA:
3865       sample = 4;
3866       break;
3867    default:
3868       break;
3869    }
3870    return sample;
3871 }
3872 
3873 static void
exec_load_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3874 exec_load_img(struct tgsi_exec_machine *mach,
3875               const struct tgsi_full_instruction *inst)
3876 {
3877    union tgsi_exec_channel r[4], sample_r;
3878    uint unit;
3879    int sample;
3880    int i, j;
3881    int dim;
3882    uint chan;
3883    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3884    struct tgsi_image_params params;
3885    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3886 
3887    unit = fetch_sampler_unit(mach, inst, 0);
3888    dim = get_image_coord_dim(inst->Memory.Texture);
3889    sample = get_image_coord_sample(inst->Memory.Texture);
3890    assert(dim <= 3);
3891 
3892    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3893    params.unit = unit;
3894    params.tgsi_tex_instr = inst->Memory.Texture;
3895    params.format = inst->Memory.Format;
3896 
3897    for (i = 0; i < dim; i++) {
3898       IFETCH(&r[i], 1, TGSI_CHAN_X + i);
3899    }
3900 
3901    if (sample)
3902       IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
3903 
3904    mach->Image->load(mach->Image, &params,
3905                      r[0].i, r[1].i, r[2].i, sample_r.i,
3906                      rgba);
3907    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3908       r[0].f[j] = rgba[0][j];
3909       r[1].f[j] = rgba[1][j];
3910       r[2].f[j] = rgba[2][j];
3911       r[3].f[j] = rgba[3][j];
3912    }
3913    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3914       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3915          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3916       }
3917    }
3918 }
3919 
3920 static void
exec_load_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3921 exec_load_buf(struct tgsi_exec_machine *mach,
3922               const struct tgsi_full_instruction *inst)
3923 {
3924    union tgsi_exec_channel r[4];
3925    uint unit;
3926    int j;
3927    uint chan;
3928    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3929    struct tgsi_buffer_params params;
3930    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3931 
3932    unit = fetch_sampler_unit(mach, inst, 0);
3933 
3934    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3935    params.unit = unit;
3936    IFETCH(&r[0], 1, TGSI_CHAN_X);
3937 
3938    mach->Buffer->load(mach->Buffer, &params,
3939                       r[0].i, rgba);
3940    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3941       r[0].f[j] = rgba[0][j];
3942       r[1].f[j] = rgba[1][j];
3943       r[2].f[j] = rgba[2][j];
3944       r[3].f[j] = rgba[3][j];
3945    }
3946    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3947       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3948          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3949       }
3950    }
3951 }
3952 
3953 static void
exec_load_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3954 exec_load_mem(struct tgsi_exec_machine *mach,
3955               const struct tgsi_full_instruction *inst)
3956 {
3957    union tgsi_exec_channel r[4];
3958    uint chan;
3959    char *ptr = mach->LocalMem;
3960    uint32_t offset;
3961    int j;
3962 
3963    IFETCH(&r[0], 1, TGSI_CHAN_X);
3964    if (r[0].u[0] >= mach->LocalMemSize)
3965       return;
3966 
3967    offset = r[0].u[0];
3968    ptr += offset;
3969 
3970    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3971       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3972          if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3973             memcpy(&r[chan].u[j], ptr + (4 * chan), 4);
3974          }
3975       }
3976    }
3977 
3978    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3979       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3980          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3981       }
3982    }
3983 }
3984 
3985 static void
exec_load(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3986 exec_load(struct tgsi_exec_machine *mach,
3987           const struct tgsi_full_instruction *inst)
3988 {
3989    if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
3990       exec_load_img(mach, inst);
3991    else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
3992       exec_load_buf(mach, inst);
3993    else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
3994       exec_load_mem(mach, inst);
3995 }
3996 
3997 static uint
fetch_store_img_unit(struct tgsi_exec_machine * mach,const struct tgsi_full_dst_register * dst)3998 fetch_store_img_unit(struct tgsi_exec_machine *mach,
3999                      const struct tgsi_full_dst_register *dst)
4000 {
4001    uint unit = 0;
4002    int i;
4003    if (dst->Register.Indirect) {
4004       union tgsi_exec_channel indir_index, index2;
4005       const uint execmask = mach->ExecMask;
4006       index2.i[0] =
4007       index2.i[1] =
4008       index2.i[2] =
4009       index2.i[3] = dst->Indirect.Index;
4010 
4011       fetch_src_file_channel(mach,
4012                              dst->Indirect.File,
4013                              dst->Indirect.Swizzle,
4014                              &index2,
4015                              &ZeroVec,
4016                              &indir_index);
4017       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4018          if (execmask & (1 << i)) {
4019             unit = dst->Register.Index + indir_index.i[i];
4020             break;
4021          }
4022       }
4023    } else {
4024       unit = dst->Register.Index;
4025    }
4026    return unit;
4027 }
4028 
4029 static void
exec_store_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4030 exec_store_img(struct tgsi_exec_machine *mach,
4031                const struct tgsi_full_instruction *inst)
4032 {
4033    union tgsi_exec_channel r[3], sample_r;
4034    union tgsi_exec_channel value[4];
4035    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4036    struct tgsi_image_params params;
4037    int dim;
4038    int sample;
4039    int i, j;
4040    uint unit;
4041    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4042    unit = fetch_store_img_unit(mach, &inst->Dst[0]);
4043    dim = get_image_coord_dim(inst->Memory.Texture);
4044    sample = get_image_coord_sample(inst->Memory.Texture);
4045    assert(dim <= 3);
4046 
4047    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4048    params.unit = unit;
4049    params.tgsi_tex_instr = inst->Memory.Texture;
4050    params.format = inst->Memory.Format;
4051 
4052    for (i = 0; i < dim; i++) {
4053       IFETCH(&r[i], 0, TGSI_CHAN_X + i);
4054    }
4055 
4056    for (i = 0; i < 4; i++) {
4057       FETCH(&value[i], 1, TGSI_CHAN_X + i);
4058    }
4059    if (sample)
4060       IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
4061 
4062    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4063       rgba[0][j] = value[0].f[j];
4064       rgba[1][j] = value[1].f[j];
4065       rgba[2][j] = value[2].f[j];
4066       rgba[3][j] = value[3].f[j];
4067    }
4068 
4069    mach->Image->store(mach->Image, &params,
4070                       r[0].i, r[1].i, r[2].i, sample_r.i,
4071                       rgba);
4072 }
4073 
4074 static void
exec_store_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4075 exec_store_buf(struct tgsi_exec_machine *mach,
4076                const struct tgsi_full_instruction *inst)
4077 {
4078    union tgsi_exec_channel r[3];
4079    union tgsi_exec_channel value[4];
4080    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4081    struct tgsi_buffer_params params;
4082    int i, j;
4083    uint unit;
4084    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4085 
4086    unit = fetch_store_img_unit(mach, &inst->Dst[0]);
4087 
4088    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4089    params.unit = unit;
4090    params.writemask = inst->Dst[0].Register.WriteMask;
4091 
4092    IFETCH(&r[0], 0, TGSI_CHAN_X);
4093    for (i = 0; i < 4; i++) {
4094       FETCH(&value[i], 1, TGSI_CHAN_X + i);
4095    }
4096 
4097    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4098       rgba[0][j] = value[0].f[j];
4099       rgba[1][j] = value[1].f[j];
4100       rgba[2][j] = value[2].f[j];
4101       rgba[3][j] = value[3].f[j];
4102    }
4103 
4104    mach->Buffer->store(mach->Buffer, &params,
4105                       r[0].i,
4106                       rgba);
4107 }
4108 
4109 static void
exec_store_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4110 exec_store_mem(struct tgsi_exec_machine *mach,
4111                const struct tgsi_full_instruction *inst)
4112 {
4113    union tgsi_exec_channel r[3];
4114    union tgsi_exec_channel value[4];
4115    uint i, chan;
4116    char *ptr = mach->LocalMem;
4117    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4118    int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4119 
4120    IFETCH(&r[0], 0, TGSI_CHAN_X);
4121 
4122    for (i = 0; i < 4; i++) {
4123       FETCH(&value[i], 1, TGSI_CHAN_X + i);
4124    }
4125 
4126    if (r[0].u[0] >= mach->LocalMemSize)
4127       return;
4128    ptr += r[0].u[0];
4129 
4130    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4131       if (execmask & (1 << i)) {
4132          for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4133             if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4134                memcpy(ptr + (chan * 4), &value[chan].u[0], 4);
4135             }
4136          }
4137       }
4138    }
4139 }
4140 
4141 static void
exec_store(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4142 exec_store(struct tgsi_exec_machine *mach,
4143            const struct tgsi_full_instruction *inst)
4144 {
4145    if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
4146       exec_store_img(mach, inst);
4147    else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
4148       exec_store_buf(mach, inst);
4149    else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
4150       exec_store_mem(mach, inst);
4151 }
4152 
4153 static void
exec_atomop_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4154 exec_atomop_img(struct tgsi_exec_machine *mach,
4155                 const struct tgsi_full_instruction *inst)
4156 {
4157    union tgsi_exec_channel r[4], sample_r;
4158    union tgsi_exec_channel value[4], value2[4];
4159    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4160    float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4161    struct tgsi_image_params params;
4162    int dim;
4163    int sample;
4164    int i, j;
4165    uint unit, chan;
4166    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4167    unit = fetch_sampler_unit(mach, inst, 0);
4168    dim = get_image_coord_dim(inst->Memory.Texture);
4169    sample = get_image_coord_sample(inst->Memory.Texture);
4170    assert(dim <= 3);
4171 
4172    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4173    params.unit = unit;
4174    params.tgsi_tex_instr = inst->Memory.Texture;
4175    params.format = inst->Memory.Format;
4176 
4177    for (i = 0; i < dim; i++) {
4178       IFETCH(&r[i], 1, TGSI_CHAN_X + i);
4179    }
4180 
4181    for (i = 0; i < 4; i++) {
4182       FETCH(&value[i], 2, TGSI_CHAN_X + i);
4183       if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4184          FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4185    }
4186    if (sample)
4187       IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
4188 
4189    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4190       rgba[0][j] = value[0].f[j];
4191       rgba[1][j] = value[1].f[j];
4192       rgba[2][j] = value[2].f[j];
4193       rgba[3][j] = value[3].f[j];
4194    }
4195    if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4196       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4197          rgba2[0][j] = value2[0].f[j];
4198          rgba2[1][j] = value2[1].f[j];
4199          rgba2[2][j] = value2[2].f[j];
4200          rgba2[3][j] = value2[3].f[j];
4201       }
4202    }
4203 
4204    mach->Image->op(mach->Image, &params, inst->Instruction.Opcode,
4205                    r[0].i, r[1].i, r[2].i, sample_r.i,
4206                    rgba, rgba2);
4207 
4208    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4209       r[0].f[j] = rgba[0][j];
4210       r[1].f[j] = rgba[1][j];
4211       r[2].f[j] = rgba[2][j];
4212       r[3].f[j] = rgba[3][j];
4213    }
4214    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4215       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4216          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4217       }
4218    }
4219 }
4220 
4221 static void
exec_atomop_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4222 exec_atomop_buf(struct tgsi_exec_machine *mach,
4223                 const struct tgsi_full_instruction *inst)
4224 {
4225    union tgsi_exec_channel r[4];
4226    union tgsi_exec_channel value[4], value2[4];
4227    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4228    float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4229    struct tgsi_buffer_params params;
4230    int i, j;
4231    uint unit, chan;
4232    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4233 
4234    unit = fetch_sampler_unit(mach, inst, 0);
4235 
4236    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4237    params.unit = unit;
4238    params.writemask = inst->Dst[0].Register.WriteMask;
4239 
4240    IFETCH(&r[0], 1, TGSI_CHAN_X);
4241 
4242    for (i = 0; i < 4; i++) {
4243       FETCH(&value[i], 2, TGSI_CHAN_X + i);
4244       if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4245          FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4246    }
4247 
4248    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4249       rgba[0][j] = value[0].f[j];
4250       rgba[1][j] = value[1].f[j];
4251       rgba[2][j] = value[2].f[j];
4252       rgba[3][j] = value[3].f[j];
4253    }
4254    if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4255       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4256          rgba2[0][j] = value2[0].f[j];
4257          rgba2[1][j] = value2[1].f[j];
4258          rgba2[2][j] = value2[2].f[j];
4259          rgba2[3][j] = value2[3].f[j];
4260       }
4261    }
4262 
4263    mach->Buffer->op(mach->Buffer, &params, inst->Instruction.Opcode,
4264                    r[0].i,
4265                    rgba, rgba2);
4266 
4267    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4268       r[0].f[j] = rgba[0][j];
4269       r[1].f[j] = rgba[1][j];
4270       r[2].f[j] = rgba[2][j];
4271       r[3].f[j] = rgba[3][j];
4272    }
4273    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4274       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4275          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4276       }
4277    }
4278 }
4279 
4280 static void
exec_atomop_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4281 exec_atomop_mem(struct tgsi_exec_machine *mach,
4282                 const struct tgsi_full_instruction *inst)
4283 {
4284    union tgsi_exec_channel r[4];
4285    union tgsi_exec_channel value[4], value2[4];
4286    char *ptr = mach->LocalMem;
4287    uint32_t val;
4288    uint chan, i;
4289    uint32_t offset;
4290    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4291    int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4292    IFETCH(&r[0], 1, TGSI_CHAN_X);
4293 
4294    if (r[0].u[0] >= mach->LocalMemSize)
4295       return;
4296 
4297    offset = r[0].u[0];
4298    ptr += offset;
4299    for (i = 0; i < 4; i++) {
4300       FETCH(&value[i], 2, TGSI_CHAN_X + i);
4301       if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4302          FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4303    }
4304 
4305    memcpy(&r[0].u[0], ptr, 4);
4306    val = r[0].u[0];
4307    switch (inst->Instruction.Opcode) {
4308    case TGSI_OPCODE_ATOMUADD:
4309       val += value[0].u[0];
4310       break;
4311    case TGSI_OPCODE_ATOMXOR:
4312       val ^= value[0].u[0];
4313       break;
4314    case TGSI_OPCODE_ATOMOR:
4315       val |= value[0].u[0];
4316       break;
4317    case TGSI_OPCODE_ATOMAND:
4318       val &= value[0].u[0];
4319       break;
4320    case TGSI_OPCODE_ATOMUMIN:
4321       val = MIN2(val, value[0].u[0]);
4322       break;
4323    case TGSI_OPCODE_ATOMUMAX:
4324       val = MAX2(val, value[0].u[0]);
4325       break;
4326    case TGSI_OPCODE_ATOMIMIN:
4327       val = MIN2(r[0].i[0], value[0].i[0]);
4328       break;
4329    case TGSI_OPCODE_ATOMIMAX:
4330       val = MAX2(r[0].i[0], value[0].i[0]);
4331       break;
4332    case TGSI_OPCODE_ATOMXCHG:
4333       val = value[0].i[0];
4334       break;
4335    case TGSI_OPCODE_ATOMCAS:
4336       if (val == value[0].u[0])
4337          val = value2[0].u[0];
4338       break;
4339    case TGSI_OPCODE_ATOMFADD:
4340       val = fui(r[0].f[0] + value[0].f[0]);
4341       break;
4342    default:
4343       break;
4344    }
4345    for (i = 0; i < TGSI_QUAD_SIZE; i++)
4346       if (execmask & (1 << i))
4347          memcpy(ptr, &val, 4);
4348 
4349    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4350       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4351          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4352       }
4353    }
4354 }
4355 
4356 static void
exec_atomop(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4357 exec_atomop(struct tgsi_exec_machine *mach,
4358             const struct tgsi_full_instruction *inst)
4359 {
4360    if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4361       exec_atomop_img(mach, inst);
4362    else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
4363       exec_atomop_buf(mach, inst);
4364    else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
4365       exec_atomop_mem(mach, inst);
4366 }
4367 
4368 static void
exec_resq_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4369 exec_resq_img(struct tgsi_exec_machine *mach,
4370               const struct tgsi_full_instruction *inst)
4371 {
4372    int result[4];
4373    union tgsi_exec_channel r[4];
4374    uint unit;
4375    int i, chan, j;
4376    struct tgsi_image_params params;
4377    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4378 
4379    unit = fetch_sampler_unit(mach, inst, 0);
4380 
4381    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4382    params.unit = unit;
4383    params.tgsi_tex_instr = inst->Memory.Texture;
4384    params.format = inst->Memory.Format;
4385 
4386    mach->Image->get_dims(mach->Image, &params, result);
4387 
4388    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4389       for (j = 0; j < 4; j++) {
4390          r[j].i[i] = result[j];
4391       }
4392    }
4393 
4394    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4395       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4396          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4397                     TGSI_EXEC_DATA_INT);
4398       }
4399    }
4400 }
4401 
4402 static void
exec_resq_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4403 exec_resq_buf(struct tgsi_exec_machine *mach,
4404               const struct tgsi_full_instruction *inst)
4405 {
4406    int result;
4407    union tgsi_exec_channel r[4];
4408    uint unit;
4409    int i, chan;
4410    struct tgsi_buffer_params params;
4411    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4412 
4413    unit = fetch_sampler_unit(mach, inst, 0);
4414 
4415    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4416    params.unit = unit;
4417 
4418    mach->Buffer->get_dims(mach->Buffer, &params, &result);
4419 
4420    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4421       r[0].i[i] = result;
4422    }
4423 
4424    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4425       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4426          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4427                     TGSI_EXEC_DATA_INT);
4428       }
4429    }
4430 }
4431 
4432 static void
exec_resq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4433 exec_resq(struct tgsi_exec_machine *mach,
4434           const struct tgsi_full_instruction *inst)
4435 {
4436    if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4437       exec_resq_img(mach, inst);
4438    else
4439       exec_resq_buf(mach, inst);
4440 }
4441 
4442 static void
micro_f2u64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4443 micro_f2u64(union tgsi_double_channel *dst,
4444             const union tgsi_exec_channel *src)
4445 {
4446    dst->u64[0] = (uint64_t)src->f[0];
4447    dst->u64[1] = (uint64_t)src->f[1];
4448    dst->u64[2] = (uint64_t)src->f[2];
4449    dst->u64[3] = (uint64_t)src->f[3];
4450 }
4451 
4452 static void
micro_f2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4453 micro_f2i64(union tgsi_double_channel *dst,
4454             const union tgsi_exec_channel *src)
4455 {
4456    dst->i64[0] = (int64_t)src->f[0];
4457    dst->i64[1] = (int64_t)src->f[1];
4458    dst->i64[2] = (int64_t)src->f[2];
4459    dst->i64[3] = (int64_t)src->f[3];
4460 }
4461 
4462 static void
micro_u2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4463 micro_u2i64(union tgsi_double_channel *dst,
4464             const union tgsi_exec_channel *src)
4465 {
4466    dst->u64[0] = (uint64_t)src->u[0];
4467    dst->u64[1] = (uint64_t)src->u[1];
4468    dst->u64[2] = (uint64_t)src->u[2];
4469    dst->u64[3] = (uint64_t)src->u[3];
4470 }
4471 
4472 static void
micro_i2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4473 micro_i2i64(union tgsi_double_channel *dst,
4474             const union tgsi_exec_channel *src)
4475 {
4476    dst->i64[0] = (int64_t)src->i[0];
4477    dst->i64[1] = (int64_t)src->i[1];
4478    dst->i64[2] = (int64_t)src->i[2];
4479    dst->i64[3] = (int64_t)src->i[3];
4480 }
4481 
4482 static void
micro_d2u64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4483 micro_d2u64(union tgsi_double_channel *dst,
4484            const union tgsi_double_channel *src)
4485 {
4486    dst->u64[0] = (uint64_t)src->d[0];
4487    dst->u64[1] = (uint64_t)src->d[1];
4488    dst->u64[2] = (uint64_t)src->d[2];
4489    dst->u64[3] = (uint64_t)src->d[3];
4490 }
4491 
4492 static void
micro_d2i64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4493 micro_d2i64(union tgsi_double_channel *dst,
4494            const union tgsi_double_channel *src)
4495 {
4496    dst->i64[0] = (int64_t)src->d[0];
4497    dst->i64[1] = (int64_t)src->d[1];
4498    dst->i64[2] = (int64_t)src->d[2];
4499    dst->i64[3] = (int64_t)src->d[3];
4500 }
4501 
4502 static void
micro_u642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4503 micro_u642d(union tgsi_double_channel *dst,
4504            const union tgsi_double_channel *src)
4505 {
4506    dst->d[0] = (double)src->u64[0];
4507    dst->d[1] = (double)src->u64[1];
4508    dst->d[2] = (double)src->u64[2];
4509    dst->d[3] = (double)src->u64[3];
4510 }
4511 
4512 static void
micro_i642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4513 micro_i642d(union tgsi_double_channel *dst,
4514            const union tgsi_double_channel *src)
4515 {
4516    dst->d[0] = (double)src->i64[0];
4517    dst->d[1] = (double)src->i64[1];
4518    dst->d[2] = (double)src->i64[2];
4519    dst->d[3] = (double)src->i64[3];
4520 }
4521 
4522 static void
micro_u642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4523 micro_u642f(union tgsi_exec_channel *dst,
4524             const union tgsi_double_channel *src)
4525 {
4526    dst->f[0] = (float)src->u64[0];
4527    dst->f[1] = (float)src->u64[1];
4528    dst->f[2] = (float)src->u64[2];
4529    dst->f[3] = (float)src->u64[3];
4530 }
4531 
4532 static void
micro_i642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4533 micro_i642f(union tgsi_exec_channel *dst,
4534             const union tgsi_double_channel *src)
4535 {
4536    dst->f[0] = (float)src->i64[0];
4537    dst->f[1] = (float)src->i64[1];
4538    dst->f[2] = (float)src->i64[2];
4539    dst->f[3] = (float)src->i64[3];
4540 }
4541 
4542 static void
exec_t_2_64(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_s op,enum tgsi_exec_datatype src_datatype)4543 exec_t_2_64(struct tgsi_exec_machine *mach,
4544           const struct tgsi_full_instruction *inst,
4545           micro_dop_s op,
4546           enum tgsi_exec_datatype src_datatype)
4547 {
4548    union tgsi_exec_channel src;
4549    union tgsi_double_channel dst;
4550 
4551    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
4552       fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
4553       op(&dst, &src);
4554       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
4555    }
4556    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
4557       fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
4558       op(&dst, &src);
4559       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
4560    }
4561 }
4562 
4563 static void
exec_64_2_t(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_sop_d op,enum tgsi_exec_datatype dst_datatype)4564 exec_64_2_t(struct tgsi_exec_machine *mach,
4565             const struct tgsi_full_instruction *inst,
4566             micro_sop_d op,
4567             enum tgsi_exec_datatype dst_datatype)
4568 {
4569    union tgsi_double_channel src;
4570    union tgsi_exec_channel dst;
4571    int wm = inst->Dst[0].Register.WriteMask;
4572    int i;
4573    int bit;
4574    for (i = 0; i < 2; i++) {
4575       bit = ffs(wm);
4576       if (bit) {
4577          wm &= ~(1 << (bit - 1));
4578          if (i == 0)
4579             fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
4580          else
4581             fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
4582          op(&dst, &src);
4583          store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype);
4584       }
4585    }
4586 }
4587 
4588 static void
micro_i2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4589 micro_i2f(union tgsi_exec_channel *dst,
4590           const union tgsi_exec_channel *src)
4591 {
4592    dst->f[0] = (float)src->i[0];
4593    dst->f[1] = (float)src->i[1];
4594    dst->f[2] = (float)src->i[2];
4595    dst->f[3] = (float)src->i[3];
4596 }
4597 
4598 static void
micro_not(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4599 micro_not(union tgsi_exec_channel *dst,
4600           const union tgsi_exec_channel *src)
4601 {
4602    dst->u[0] = ~src->u[0];
4603    dst->u[1] = ~src->u[1];
4604    dst->u[2] = ~src->u[2];
4605    dst->u[3] = ~src->u[3];
4606 }
4607 
4608 static void
micro_shl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4609 micro_shl(union tgsi_exec_channel *dst,
4610           const union tgsi_exec_channel *src0,
4611           const union tgsi_exec_channel *src1)
4612 {
4613    unsigned masked_count;
4614    masked_count = src1->u[0] & 0x1f;
4615    dst->u[0] = src0->u[0] << masked_count;
4616    masked_count = src1->u[1] & 0x1f;
4617    dst->u[1] = src0->u[1] << masked_count;
4618    masked_count = src1->u[2] & 0x1f;
4619    dst->u[2] = src0->u[2] << masked_count;
4620    masked_count = src1->u[3] & 0x1f;
4621    dst->u[3] = src0->u[3] << masked_count;
4622 }
4623 
4624 static void
micro_and(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4625 micro_and(union tgsi_exec_channel *dst,
4626           const union tgsi_exec_channel *src0,
4627           const union tgsi_exec_channel *src1)
4628 {
4629    dst->u[0] = src0->u[0] & src1->u[0];
4630    dst->u[1] = src0->u[1] & src1->u[1];
4631    dst->u[2] = src0->u[2] & src1->u[2];
4632    dst->u[3] = src0->u[3] & src1->u[3];
4633 }
4634 
4635 static void
micro_or(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4636 micro_or(union tgsi_exec_channel *dst,
4637          const union tgsi_exec_channel *src0,
4638          const union tgsi_exec_channel *src1)
4639 {
4640    dst->u[0] = src0->u[0] | src1->u[0];
4641    dst->u[1] = src0->u[1] | src1->u[1];
4642    dst->u[2] = src0->u[2] | src1->u[2];
4643    dst->u[3] = src0->u[3] | src1->u[3];
4644 }
4645 
4646 static void
micro_xor(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4647 micro_xor(union tgsi_exec_channel *dst,
4648           const union tgsi_exec_channel *src0,
4649           const union tgsi_exec_channel *src1)
4650 {
4651    dst->u[0] = src0->u[0] ^ src1->u[0];
4652    dst->u[1] = src0->u[1] ^ src1->u[1];
4653    dst->u[2] = src0->u[2] ^ src1->u[2];
4654    dst->u[3] = src0->u[3] ^ src1->u[3];
4655 }
4656 
4657 static void
micro_mod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4658 micro_mod(union tgsi_exec_channel *dst,
4659           const union tgsi_exec_channel *src0,
4660           const union tgsi_exec_channel *src1)
4661 {
4662    dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0;
4663    dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0;
4664    dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0;
4665    dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0;
4666 }
4667 
4668 static void
micro_f2i(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4669 micro_f2i(union tgsi_exec_channel *dst,
4670           const union tgsi_exec_channel *src)
4671 {
4672    dst->i[0] = (int)src->f[0];
4673    dst->i[1] = (int)src->f[1];
4674    dst->i[2] = (int)src->f[2];
4675    dst->i[3] = (int)src->f[3];
4676 }
4677 
4678 static void
micro_fseq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4679 micro_fseq(union tgsi_exec_channel *dst,
4680            const union tgsi_exec_channel *src0,
4681            const union tgsi_exec_channel *src1)
4682 {
4683    dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0;
4684    dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0;
4685    dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0;
4686    dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0;
4687 }
4688 
4689 static void
micro_fsge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4690 micro_fsge(union tgsi_exec_channel *dst,
4691            const union tgsi_exec_channel *src0,
4692            const union tgsi_exec_channel *src1)
4693 {
4694    dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0;
4695    dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0;
4696    dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0;
4697    dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0;
4698 }
4699 
4700 static void
micro_fslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4701 micro_fslt(union tgsi_exec_channel *dst,
4702            const union tgsi_exec_channel *src0,
4703            const union tgsi_exec_channel *src1)
4704 {
4705    dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0;
4706    dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0;
4707    dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0;
4708    dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0;
4709 }
4710 
4711 static void
micro_fsne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4712 micro_fsne(union tgsi_exec_channel *dst,
4713            const union tgsi_exec_channel *src0,
4714            const union tgsi_exec_channel *src1)
4715 {
4716    dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0;
4717    dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0;
4718    dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0;
4719    dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0;
4720 }
4721 
4722 static void
micro_idiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4723 micro_idiv(union tgsi_exec_channel *dst,
4724            const union tgsi_exec_channel *src0,
4725            const union tgsi_exec_channel *src1)
4726 {
4727    dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0;
4728    dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0;
4729    dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0;
4730    dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0;
4731 }
4732 
4733 static void
micro_imax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4734 micro_imax(union tgsi_exec_channel *dst,
4735            const union tgsi_exec_channel *src0,
4736            const union tgsi_exec_channel *src1)
4737 {
4738    dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
4739    dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
4740    dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
4741    dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
4742 }
4743 
4744 static void
micro_imin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4745 micro_imin(union tgsi_exec_channel *dst,
4746            const union tgsi_exec_channel *src0,
4747            const union tgsi_exec_channel *src1)
4748 {
4749    dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
4750    dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
4751    dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
4752    dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
4753 }
4754 
4755 static void
micro_isge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4756 micro_isge(union tgsi_exec_channel *dst,
4757            const union tgsi_exec_channel *src0,
4758            const union tgsi_exec_channel *src1)
4759 {
4760    dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
4761    dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
4762    dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
4763    dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
4764 }
4765 
4766 static void
micro_ishr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4767 micro_ishr(union tgsi_exec_channel *dst,
4768            const union tgsi_exec_channel *src0,
4769            const union tgsi_exec_channel *src1)
4770 {
4771    unsigned masked_count;
4772    masked_count = src1->i[0] & 0x1f;
4773    dst->i[0] = src0->i[0] >> masked_count;
4774    masked_count = src1->i[1] & 0x1f;
4775    dst->i[1] = src0->i[1] >> masked_count;
4776    masked_count = src1->i[2] & 0x1f;
4777    dst->i[2] = src0->i[2] >> masked_count;
4778    masked_count = src1->i[3] & 0x1f;
4779    dst->i[3] = src0->i[3] >> masked_count;
4780 }
4781 
4782 static void
micro_islt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4783 micro_islt(union tgsi_exec_channel *dst,
4784            const union tgsi_exec_channel *src0,
4785            const union tgsi_exec_channel *src1)
4786 {
4787    dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
4788    dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
4789    dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
4790    dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
4791 }
4792 
4793 static void
micro_f2u(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4794 micro_f2u(union tgsi_exec_channel *dst,
4795           const union tgsi_exec_channel *src)
4796 {
4797    dst->u[0] = (uint)src->f[0];
4798    dst->u[1] = (uint)src->f[1];
4799    dst->u[2] = (uint)src->f[2];
4800    dst->u[3] = (uint)src->f[3];
4801 }
4802 
4803 static void
micro_u2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4804 micro_u2f(union tgsi_exec_channel *dst,
4805           const union tgsi_exec_channel *src)
4806 {
4807    dst->f[0] = (float)src->u[0];
4808    dst->f[1] = (float)src->u[1];
4809    dst->f[2] = (float)src->u[2];
4810    dst->f[3] = (float)src->u[3];
4811 }
4812 
4813 static void
micro_uadd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4814 micro_uadd(union tgsi_exec_channel *dst,
4815            const union tgsi_exec_channel *src0,
4816            const union tgsi_exec_channel *src1)
4817 {
4818    dst->u[0] = src0->u[0] + src1->u[0];
4819    dst->u[1] = src0->u[1] + src1->u[1];
4820    dst->u[2] = src0->u[2] + src1->u[2];
4821    dst->u[3] = src0->u[3] + src1->u[3];
4822 }
4823 
4824 static void
micro_udiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4825 micro_udiv(union tgsi_exec_channel *dst,
4826            const union tgsi_exec_channel *src0,
4827            const union tgsi_exec_channel *src1)
4828 {
4829    dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
4830    dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
4831    dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
4832    dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
4833 }
4834 
4835 static void
micro_umad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)4836 micro_umad(union tgsi_exec_channel *dst,
4837            const union tgsi_exec_channel *src0,
4838            const union tgsi_exec_channel *src1,
4839            const union tgsi_exec_channel *src2)
4840 {
4841    dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
4842    dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
4843    dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
4844    dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
4845 }
4846 
4847 static void
micro_umax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4848 micro_umax(union tgsi_exec_channel *dst,
4849            const union tgsi_exec_channel *src0,
4850            const union tgsi_exec_channel *src1)
4851 {
4852    dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
4853    dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
4854    dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
4855    dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
4856 }
4857 
4858 static void
micro_umin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4859 micro_umin(union tgsi_exec_channel *dst,
4860            const union tgsi_exec_channel *src0,
4861            const union tgsi_exec_channel *src1)
4862 {
4863    dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
4864    dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
4865    dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
4866    dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
4867 }
4868 
4869 static void
micro_umod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4870 micro_umod(union tgsi_exec_channel *dst,
4871            const union tgsi_exec_channel *src0,
4872            const union tgsi_exec_channel *src1)
4873 {
4874    dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
4875    dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
4876    dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
4877    dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
4878 }
4879 
4880 static void
micro_umul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4881 micro_umul(union tgsi_exec_channel *dst,
4882            const union tgsi_exec_channel *src0,
4883            const union tgsi_exec_channel *src1)
4884 {
4885    dst->u[0] = src0->u[0] * src1->u[0];
4886    dst->u[1] = src0->u[1] * src1->u[1];
4887    dst->u[2] = src0->u[2] * src1->u[2];
4888    dst->u[3] = src0->u[3] * src1->u[3];
4889 }
4890 
4891 static void
micro_imul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4892 micro_imul_hi(union tgsi_exec_channel *dst,
4893               const union tgsi_exec_channel *src0,
4894               const union tgsi_exec_channel *src1)
4895 {
4896 #define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
4897    dst->i[0] = I64M(src0->i[0], src1->i[0]);
4898    dst->i[1] = I64M(src0->i[1], src1->i[1]);
4899    dst->i[2] = I64M(src0->i[2], src1->i[2]);
4900    dst->i[3] = I64M(src0->i[3], src1->i[3]);
4901 #undef I64M
4902 }
4903 
4904 static void
micro_umul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4905 micro_umul_hi(union tgsi_exec_channel *dst,
4906               const union tgsi_exec_channel *src0,
4907               const union tgsi_exec_channel *src1)
4908 {
4909 #define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
4910    dst->u[0] = U64M(src0->u[0], src1->u[0]);
4911    dst->u[1] = U64M(src0->u[1], src1->u[1]);
4912    dst->u[2] = U64M(src0->u[2], src1->u[2]);
4913    dst->u[3] = U64M(src0->u[3], src1->u[3]);
4914 #undef U64M
4915 }
4916 
4917 static void
micro_useq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4918 micro_useq(union tgsi_exec_channel *dst,
4919            const union tgsi_exec_channel *src0,
4920            const union tgsi_exec_channel *src1)
4921 {
4922    dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
4923    dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
4924    dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
4925    dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
4926 }
4927 
4928 static void
micro_usge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4929 micro_usge(union tgsi_exec_channel *dst,
4930            const union tgsi_exec_channel *src0,
4931            const union tgsi_exec_channel *src1)
4932 {
4933    dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
4934    dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
4935    dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
4936    dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
4937 }
4938 
4939 static void
micro_ushr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4940 micro_ushr(union tgsi_exec_channel *dst,
4941            const union tgsi_exec_channel *src0,
4942            const union tgsi_exec_channel *src1)
4943 {
4944    unsigned masked_count;
4945    masked_count = src1->u[0] & 0x1f;
4946    dst->u[0] = src0->u[0] >> masked_count;
4947    masked_count = src1->u[1] & 0x1f;
4948    dst->u[1] = src0->u[1] >> masked_count;
4949    masked_count = src1->u[2] & 0x1f;
4950    dst->u[2] = src0->u[2] >> masked_count;
4951    masked_count = src1->u[3] & 0x1f;
4952    dst->u[3] = src0->u[3] >> masked_count;
4953 }
4954 
4955 static void
micro_uslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4956 micro_uslt(union tgsi_exec_channel *dst,
4957            const union tgsi_exec_channel *src0,
4958            const union tgsi_exec_channel *src1)
4959 {
4960    dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
4961    dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
4962    dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
4963    dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
4964 }
4965 
4966 static void
micro_usne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4967 micro_usne(union tgsi_exec_channel *dst,
4968            const union tgsi_exec_channel *src0,
4969            const union tgsi_exec_channel *src1)
4970 {
4971    dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
4972    dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
4973    dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
4974    dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
4975 }
4976 
4977 static void
micro_uarl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4978 micro_uarl(union tgsi_exec_channel *dst,
4979            const union tgsi_exec_channel *src)
4980 {
4981    dst->i[0] = src->u[0];
4982    dst->i[1] = src->u[1];
4983    dst->i[2] = src->u[2];
4984    dst->i[3] = src->u[3];
4985 }
4986 
4987 /**
4988  * Signed bitfield extract (i.e. sign-extend the extracted bits)
4989  */
4990 static void
micro_ibfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)4991 micro_ibfe(union tgsi_exec_channel *dst,
4992            const union tgsi_exec_channel *src0,
4993            const union tgsi_exec_channel *src1,
4994            const union tgsi_exec_channel *src2)
4995 {
4996    int i;
4997    for (i = 0; i < 4; i++) {
4998       int width = src2->i[i];
4999       int offset = src1->i[i] & 0x1f;
5000       if (width == 32 && offset == 0) {
5001          dst->i[i] = src0->i[i];
5002          continue;
5003       }
5004       width &= 0x1f;
5005       if (width == 0)
5006          dst->i[i] = 0;
5007       else if (width + offset < 32)
5008          dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);
5009       else
5010          dst->i[i] = src0->i[i] >> offset;
5011    }
5012 }
5013 
5014 /**
5015  * Unsigned bitfield extract
5016  */
5017 static void
micro_ubfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)5018 micro_ubfe(union tgsi_exec_channel *dst,
5019            const union tgsi_exec_channel *src0,
5020            const union tgsi_exec_channel *src1,
5021            const union tgsi_exec_channel *src2)
5022 {
5023    int i;
5024    for (i = 0; i < 4; i++) {
5025       int width = src2->u[i];
5026       int offset = src1->u[i] & 0x1f;
5027       if (width == 32 && offset == 0) {
5028          dst->u[i] = src0->u[i];
5029          continue;
5030       }
5031       width &= 0x1f;
5032       if (width == 0)
5033          dst->u[i] = 0;
5034       else if (width + offset < 32)
5035          dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);
5036       else
5037          dst->u[i] = src0->u[i] >> offset;
5038    }
5039 }
5040 
5041 /**
5042  * Bitfield insert: copy low bits from src1 into a region of src0.
5043  */
5044 static void
micro_bfi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)5045 micro_bfi(union tgsi_exec_channel *dst,
5046           const union tgsi_exec_channel *src0,
5047           const union tgsi_exec_channel *src1,
5048           const union tgsi_exec_channel *src2,
5049           const union tgsi_exec_channel *src3)
5050 {
5051    int i;
5052    for (i = 0; i < 4; i++) {
5053       int width = src3->u[i];
5054       int offset = src2->u[i] & 0x1f;
5055       if (width == 32) {
5056          dst->u[i] = src1->u[i];
5057       } else {
5058          int bitmask = ((1 << width) - 1) << offset;
5059          dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
5060       }
5061    }
5062 }
5063 
5064 static void
micro_brev(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5065 micro_brev(union tgsi_exec_channel *dst,
5066            const union tgsi_exec_channel *src)
5067 {
5068    dst->u[0] = util_bitreverse(src->u[0]);
5069    dst->u[1] = util_bitreverse(src->u[1]);
5070    dst->u[2] = util_bitreverse(src->u[2]);
5071    dst->u[3] = util_bitreverse(src->u[3]);
5072 }
5073 
5074 static void
micro_popc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5075 micro_popc(union tgsi_exec_channel *dst,
5076            const union tgsi_exec_channel *src)
5077 {
5078    dst->u[0] = util_bitcount(src->u[0]);
5079    dst->u[1] = util_bitcount(src->u[1]);
5080    dst->u[2] = util_bitcount(src->u[2]);
5081    dst->u[3] = util_bitcount(src->u[3]);
5082 }
5083 
5084 static void
micro_lsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5085 micro_lsb(union tgsi_exec_channel *dst,
5086           const union tgsi_exec_channel *src)
5087 {
5088    dst->i[0] = ffs(src->u[0]) - 1;
5089    dst->i[1] = ffs(src->u[1]) - 1;
5090    dst->i[2] = ffs(src->u[2]) - 1;
5091    dst->i[3] = ffs(src->u[3]) - 1;
5092 }
5093 
5094 static void
micro_imsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5095 micro_imsb(union tgsi_exec_channel *dst,
5096            const union tgsi_exec_channel *src)
5097 {
5098    dst->i[0] = util_last_bit_signed(src->i[0]) - 1;
5099    dst->i[1] = util_last_bit_signed(src->i[1]) - 1;
5100    dst->i[2] = util_last_bit_signed(src->i[2]) - 1;
5101    dst->i[3] = util_last_bit_signed(src->i[3]) - 1;
5102 }
5103 
5104 static void
micro_umsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5105 micro_umsb(union tgsi_exec_channel *dst,
5106            const union tgsi_exec_channel *src)
5107 {
5108    dst->i[0] = util_last_bit(src->u[0]) - 1;
5109    dst->i[1] = util_last_bit(src->u[1]) - 1;
5110    dst->i[2] = util_last_bit(src->u[2]) - 1;
5111    dst->i[3] = util_last_bit(src->u[3]) - 1;
5112 }
5113 
5114 
5115 static void
exec_interp_at_sample(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)5116 exec_interp_at_sample(struct tgsi_exec_machine *mach,
5117                       const struct tgsi_full_instruction *inst)
5118 {
5119    union tgsi_exec_channel index;
5120    union tgsi_exec_channel index2D;
5121    union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
5122    const struct tgsi_full_src_register *reg = &inst->Src[0];
5123 
5124    assert(reg->Register.File == TGSI_FILE_INPUT);
5125    assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE);
5126 
5127    get_index_registers(mach, reg, &index, &index2D);
5128    float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX];
5129 
5130    /* Short cut: sample 0 is like a normal fetch */
5131    for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
5132       if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
5133          continue;
5134 
5135       fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
5136                              &result[chan]);
5137       if (sample != 0.0f) {
5138 
5139       /* TODO: define the samples > 0, but so far we only do fake MSAA */
5140          float x = 0;
5141          float y = 0;
5142 
5143          unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan];
5144          assert(pos >= 0);
5145          assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
5146          mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]);
5147       }
5148       store_dest(mach, &result[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
5149    }
5150 }
5151 
5152 
5153 static void
exec_interp_at_offset(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)5154 exec_interp_at_offset(struct tgsi_exec_machine *mach,
5155                       const struct tgsi_full_instruction *inst)
5156 {
5157    union tgsi_exec_channel index;
5158    union tgsi_exec_channel index2D;
5159    union tgsi_exec_channel ofsx;
5160    union tgsi_exec_channel ofsy;
5161    const struct tgsi_full_src_register *reg = &inst->Src[0];
5162 
5163    assert(reg->Register.File == TGSI_FILE_INPUT);
5164 
5165    get_index_registers(mach, reg, &index, &index2D);
5166    unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0];
5167 
5168    fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
5169    fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
5170 
5171    for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
5172       if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
5173          continue;
5174       union tgsi_exec_channel result;
5175       fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result);
5176       mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result);
5177       store_dest(mach, &result, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
5178    }
5179 }
5180 
5181 
5182 static void
exec_interp_at_centroid(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)5183 exec_interp_at_centroid(struct tgsi_exec_machine *mach,
5184                         const struct tgsi_full_instruction *inst)
5185 {
5186    union tgsi_exec_channel index;
5187    union tgsi_exec_channel index2D;
5188    union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
5189    const struct tgsi_full_src_register *reg = &inst->Src[0];
5190 
5191    assert(reg->Register.File == TGSI_FILE_INPUT);
5192    get_index_registers(mach, reg, &index, &index2D);
5193 
5194    for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
5195       if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
5196          continue;
5197 
5198       /* Here we should add the change to use a sample that lies within the
5199        * primitive (Section 15.2):
5200        *
5201        * "When interpolating variables declared using centroid in ,
5202        * the variable is sampled at a location within the pixel covered
5203        * by the primitive generating the fragment.
5204        * ...
5205        * The built-in functions interpolateAtCentroid ... will sample
5206        * variables as though they were declared with the centroid ...
5207        * qualifier[s]."
5208        *
5209        * Since we only support 1 sample currently, this is just a pass-through.
5210        */
5211       fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
5212                              &result[chan]);
5213       store_dest(mach, &result[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
5214    }
5215 
5216 }
5217 
5218 
5219 /**
5220  * Execute a TGSI instruction.
5221  * Returns TRUE if a barrier instruction is hit,
5222  * otherwise FALSE.
5223  */
5224 static boolean
exec_instruction(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int * pc)5225 exec_instruction(
5226    struct tgsi_exec_machine *mach,
5227    const struct tgsi_full_instruction *inst,
5228    int *pc )
5229 {
5230    union tgsi_exec_channel r[10];
5231 
5232    (*pc)++;
5233 
5234    switch (inst->Instruction.Opcode) {
5235    case TGSI_OPCODE_ARL:
5236       exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5237       break;
5238 
5239    case TGSI_OPCODE_MOV:
5240       exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5241       break;
5242 
5243    case TGSI_OPCODE_LIT:
5244       exec_lit(mach, inst);
5245       break;
5246 
5247    case TGSI_OPCODE_RCP:
5248       exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5249       break;
5250 
5251    case TGSI_OPCODE_RSQ:
5252       exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5253       break;
5254 
5255    case TGSI_OPCODE_EXP:
5256       exec_exp(mach, inst);
5257       break;
5258 
5259    case TGSI_OPCODE_LOG:
5260       exec_log(mach, inst);
5261       break;
5262 
5263    case TGSI_OPCODE_MUL:
5264       exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5265       break;
5266 
5267    case TGSI_OPCODE_ADD:
5268       exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5269       break;
5270 
5271    case TGSI_OPCODE_DP3:
5272       exec_dp3(mach, inst);
5273       break;
5274 
5275    case TGSI_OPCODE_DP4:
5276       exec_dp4(mach, inst);
5277       break;
5278 
5279    case TGSI_OPCODE_DST:
5280       exec_dst(mach, inst);
5281       break;
5282 
5283    case TGSI_OPCODE_MIN:
5284       exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5285       break;
5286 
5287    case TGSI_OPCODE_MAX:
5288       exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5289       break;
5290 
5291    case TGSI_OPCODE_SLT:
5292       exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5293       break;
5294 
5295    case TGSI_OPCODE_SGE:
5296       exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5297       break;
5298 
5299    case TGSI_OPCODE_MAD:
5300       exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5301       break;
5302 
5303    case TGSI_OPCODE_LRP:
5304       exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5305       break;
5306 
5307    case TGSI_OPCODE_SQRT:
5308       exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5309       break;
5310 
5311    case TGSI_OPCODE_FRC:
5312       exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5313       break;
5314 
5315    case TGSI_OPCODE_FLR:
5316       exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5317       break;
5318 
5319    case TGSI_OPCODE_ROUND:
5320       exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5321       break;
5322 
5323    case TGSI_OPCODE_EX2:
5324       exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5325       break;
5326 
5327    case TGSI_OPCODE_LG2:
5328       exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5329       break;
5330 
5331    case TGSI_OPCODE_POW:
5332       exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5333       break;
5334 
5335    case TGSI_OPCODE_LDEXP:
5336       exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5337       break;
5338 
5339    case TGSI_OPCODE_COS:
5340       exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5341       break;
5342 
5343    case TGSI_OPCODE_DDX_FINE:
5344       exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5345       break;
5346 
5347    case TGSI_OPCODE_DDX:
5348       exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5349       break;
5350 
5351    case TGSI_OPCODE_DDY_FINE:
5352       exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5353       break;
5354 
5355    case TGSI_OPCODE_DDY:
5356       exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5357       break;
5358 
5359    case TGSI_OPCODE_KILL:
5360       exec_kill (mach);
5361       break;
5362 
5363    case TGSI_OPCODE_KILL_IF:
5364       exec_kill_if (mach, inst);
5365       break;
5366 
5367    case TGSI_OPCODE_PK2H:
5368       exec_pk2h(mach, inst);
5369       break;
5370 
5371    case TGSI_OPCODE_PK2US:
5372       assert (0);
5373       break;
5374 
5375    case TGSI_OPCODE_PK4B:
5376       assert (0);
5377       break;
5378 
5379    case TGSI_OPCODE_PK4UB:
5380       assert (0);
5381       break;
5382 
5383    case TGSI_OPCODE_SEQ:
5384       exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5385       break;
5386 
5387    case TGSI_OPCODE_SGT:
5388       exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5389       break;
5390 
5391    case TGSI_OPCODE_SIN:
5392       exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5393       break;
5394 
5395    case TGSI_OPCODE_SLE:
5396       exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5397       break;
5398 
5399    case TGSI_OPCODE_SNE:
5400       exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5401       break;
5402 
5403    case TGSI_OPCODE_TEX:
5404       /* simple texture lookup */
5405       /* src[0] = texcoord */
5406       /* src[1] = sampler unit */
5407       exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
5408       break;
5409 
5410    case TGSI_OPCODE_TXB:
5411       /* Texture lookup with lod bias */
5412       /* src[0] = texcoord (src[0].w = LOD bias) */
5413       /* src[1] = sampler unit */
5414       exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
5415       break;
5416 
5417    case TGSI_OPCODE_TXD:
5418       /* Texture lookup with explict partial derivatives */
5419       /* src[0] = texcoord */
5420       /* src[1] = d[strq]/dx */
5421       /* src[2] = d[strq]/dy */
5422       /* src[3] = sampler unit */
5423       exec_txd(mach, inst);
5424       break;
5425 
5426    case TGSI_OPCODE_TXL:
5427       /* Texture lookup with explit LOD */
5428       /* src[0] = texcoord (src[0].w = LOD) */
5429       /* src[1] = sampler unit */
5430       exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
5431       break;
5432 
5433    case TGSI_OPCODE_TXP:
5434       /* Texture lookup with projection */
5435       /* src[0] = texcoord (src[0].w = projection) */
5436       /* src[1] = sampler unit */
5437       exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
5438       break;
5439 
5440    case TGSI_OPCODE_TG4:
5441       /* src[0] = texcoord */
5442       /* src[1] = component */
5443       /* src[2] = sampler unit */
5444       exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
5445       break;
5446 
5447    case TGSI_OPCODE_LODQ:
5448       /* src[0] = texcoord */
5449       /* src[1] = sampler unit */
5450       exec_lodq(mach, inst);
5451       break;
5452 
5453    case TGSI_OPCODE_UP2H:
5454       exec_up2h(mach, inst);
5455       break;
5456 
5457    case TGSI_OPCODE_UP2US:
5458       assert (0);
5459       break;
5460 
5461    case TGSI_OPCODE_UP4B:
5462       assert (0);
5463       break;
5464 
5465    case TGSI_OPCODE_UP4UB:
5466       assert (0);
5467       break;
5468 
5469    case TGSI_OPCODE_ARR:
5470       exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5471       break;
5472 
5473    case TGSI_OPCODE_CAL:
5474       /* skip the call if no execution channels are enabled */
5475       if (mach->ExecMask) {
5476          /* do the call */
5477 
5478          /* First, record the depths of the execution stacks.
5479           * This is important for deeply nested/looped return statements.
5480           * We have to unwind the stacks by the correct amount.  For a
5481           * real code generator, we could determine the number of entries
5482           * to pop off each stack with simple static analysis and avoid
5483           * implementing this data structure at run time.
5484           */
5485          mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
5486          mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
5487          mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
5488          mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
5489          mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
5490          /* note that PC was already incremented above */
5491          mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
5492 
5493          mach->CallStackTop++;
5494 
5495          /* Second, push the Cond, Loop, Cont, Func stacks */
5496          assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5497          assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5498          assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5499          assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
5500          assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5501          assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
5502 
5503          mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5504          mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5505          mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5506          mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
5507          mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5508          mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
5509 
5510          /* Finally, jump to the subroutine.  The label is a pointer
5511           * (an instruction number) to the BGNSUB instruction.
5512           */
5513          *pc = inst->Label.Label;
5514          assert(mach->Instructions[*pc].Instruction.Opcode
5515                 == TGSI_OPCODE_BGNSUB);
5516       }
5517       break;
5518 
5519    case TGSI_OPCODE_RET:
5520       mach->FuncMask &= ~mach->ExecMask;
5521       UPDATE_EXEC_MASK(mach);
5522 
5523       if (mach->FuncMask == 0x0) {
5524          /* really return now (otherwise, keep executing */
5525 
5526          if (mach->CallStackTop == 0) {
5527             /* returning from main() */
5528             mach->CondStackTop = 0;
5529             mach->LoopStackTop = 0;
5530             mach->ContStackTop = 0;
5531             mach->LoopLabelStackTop = 0;
5532             mach->SwitchStackTop = 0;
5533             mach->BreakStackTop = 0;
5534             *pc = -1;
5535             return FALSE;
5536          }
5537 
5538          assert(mach->CallStackTop > 0);
5539          mach->CallStackTop--;
5540 
5541          mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5542          mach->CondMask = mach->CondStack[mach->CondStackTop];
5543 
5544          mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5545          mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5546 
5547          mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5548          mach->ContMask = mach->ContStack[mach->ContStackTop];
5549 
5550          mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5551          mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5552 
5553          mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5554          mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5555 
5556          assert(mach->FuncStackTop > 0);
5557          mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5558 
5559          *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5560 
5561          UPDATE_EXEC_MASK(mach);
5562       }
5563       break;
5564 
5565    case TGSI_OPCODE_SSG:
5566       exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5567       break;
5568 
5569    case TGSI_OPCODE_CMP:
5570       exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5571       break;
5572 
5573    case TGSI_OPCODE_DIV:
5574       exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5575       break;
5576 
5577    case TGSI_OPCODE_DP2:
5578       exec_dp2(mach, inst);
5579       break;
5580 
5581    case TGSI_OPCODE_IF:
5582       /* push CondMask */
5583       assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5584       mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5585       FETCH( &r[0], 0, TGSI_CHAN_X );
5586       /* update CondMask */
5587       if( ! r[0].f[0] ) {
5588          mach->CondMask &= ~0x1;
5589       }
5590       if( ! r[0].f[1] ) {
5591          mach->CondMask &= ~0x2;
5592       }
5593       if( ! r[0].f[2] ) {
5594          mach->CondMask &= ~0x4;
5595       }
5596       if( ! r[0].f[3] ) {
5597          mach->CondMask &= ~0x8;
5598       }
5599       UPDATE_EXEC_MASK(mach);
5600       /* Todo: If CondMask==0, jump to ELSE */
5601       break;
5602 
5603    case TGSI_OPCODE_UIF:
5604       /* push CondMask */
5605       assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5606       mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5607       IFETCH( &r[0], 0, TGSI_CHAN_X );
5608       /* update CondMask */
5609       if( ! r[0].u[0] ) {
5610          mach->CondMask &= ~0x1;
5611       }
5612       if( ! r[0].u[1] ) {
5613          mach->CondMask &= ~0x2;
5614       }
5615       if( ! r[0].u[2] ) {
5616          mach->CondMask &= ~0x4;
5617       }
5618       if( ! r[0].u[3] ) {
5619          mach->CondMask &= ~0x8;
5620       }
5621       UPDATE_EXEC_MASK(mach);
5622       /* Todo: If CondMask==0, jump to ELSE */
5623       break;
5624 
5625    case TGSI_OPCODE_ELSE:
5626       /* invert CondMask wrt previous mask */
5627       {
5628          uint prevMask;
5629          assert(mach->CondStackTop > 0);
5630          prevMask = mach->CondStack[mach->CondStackTop - 1];
5631          mach->CondMask = ~mach->CondMask & prevMask;
5632          UPDATE_EXEC_MASK(mach);
5633          /* Todo: If CondMask==0, jump to ENDIF */
5634       }
5635       break;
5636 
5637    case TGSI_OPCODE_ENDIF:
5638       /* pop CondMask */
5639       assert(mach->CondStackTop > 0);
5640       mach->CondMask = mach->CondStack[--mach->CondStackTop];
5641       UPDATE_EXEC_MASK(mach);
5642       break;
5643 
5644    case TGSI_OPCODE_END:
5645       /* make sure we end primitives which haven't
5646        * been explicitly emitted */
5647       conditional_emit_primitive(mach);
5648       /* halt execution */
5649       *pc = -1;
5650       break;
5651 
5652    case TGSI_OPCODE_CEIL:
5653       exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5654       break;
5655 
5656    case TGSI_OPCODE_I2F:
5657       exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
5658       break;
5659 
5660    case TGSI_OPCODE_NOT:
5661       exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5662       break;
5663 
5664    case TGSI_OPCODE_TRUNC:
5665       exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5666       break;
5667 
5668    case TGSI_OPCODE_SHL:
5669       exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5670       break;
5671 
5672    case TGSI_OPCODE_AND:
5673       exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5674       break;
5675 
5676    case TGSI_OPCODE_OR:
5677       exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5678       break;
5679 
5680    case TGSI_OPCODE_MOD:
5681       exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5682       break;
5683 
5684    case TGSI_OPCODE_XOR:
5685       exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5686       break;
5687 
5688    case TGSI_OPCODE_TXF:
5689       exec_txf(mach, inst);
5690       break;
5691 
5692    case TGSI_OPCODE_TXQ:
5693       exec_txq(mach, inst);
5694       break;
5695 
5696    case TGSI_OPCODE_EMIT:
5697       emit_vertex(mach, inst);
5698       break;
5699 
5700    case TGSI_OPCODE_ENDPRIM:
5701       emit_primitive(mach, inst);
5702       break;
5703 
5704    case TGSI_OPCODE_BGNLOOP:
5705       /* push LoopMask and ContMasks */
5706       assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5707       assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5708       assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5709       assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5710 
5711       mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5712       mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5713       mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
5714       mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5715       mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
5716       break;
5717 
5718    case TGSI_OPCODE_ENDLOOP:
5719       /* Restore ContMask, but don't pop */
5720       assert(mach->ContStackTop > 0);
5721       mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
5722       UPDATE_EXEC_MASK(mach);
5723       if (mach->ExecMask) {
5724          /* repeat loop: jump to instruction just past BGNLOOP */
5725          assert(mach->LoopLabelStackTop > 0);
5726          *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
5727       }
5728       else {
5729          /* exit loop: pop LoopMask */
5730          assert(mach->LoopStackTop > 0);
5731          mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
5732          /* pop ContMask */
5733          assert(mach->ContStackTop > 0);
5734          mach->ContMask = mach->ContStack[--mach->ContStackTop];
5735          assert(mach->LoopLabelStackTop > 0);
5736          --mach->LoopLabelStackTop;
5737 
5738          mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
5739       }
5740       UPDATE_EXEC_MASK(mach);
5741       break;
5742 
5743    case TGSI_OPCODE_BRK:
5744       exec_break(mach);
5745       break;
5746 
5747    case TGSI_OPCODE_CONT:
5748       /* turn off cont channels for each enabled exec channel */
5749       mach->ContMask &= ~mach->ExecMask;
5750       /* Todo: if mach->LoopMask == 0, jump to end of loop */
5751       UPDATE_EXEC_MASK(mach);
5752       break;
5753 
5754    case TGSI_OPCODE_BGNSUB:
5755       /* no-op */
5756       break;
5757 
5758    case TGSI_OPCODE_ENDSUB:
5759       /*
5760        * XXX: This really should be a no-op. We should never reach this opcode.
5761        */
5762 
5763       assert(mach->CallStackTop > 0);
5764       mach->CallStackTop--;
5765 
5766       mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5767       mach->CondMask = mach->CondStack[mach->CondStackTop];
5768 
5769       mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5770       mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5771 
5772       mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5773       mach->ContMask = mach->ContStack[mach->ContStackTop];
5774 
5775       mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5776       mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5777 
5778       mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5779       mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5780 
5781       assert(mach->FuncStackTop > 0);
5782       mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5783 
5784       *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5785 
5786       UPDATE_EXEC_MASK(mach);
5787       break;
5788 
5789    case TGSI_OPCODE_NOP:
5790       break;
5791 
5792    case TGSI_OPCODE_F2I:
5793       exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5794       break;
5795 
5796    case TGSI_OPCODE_FSEQ:
5797       exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5798       break;
5799 
5800    case TGSI_OPCODE_FSGE:
5801       exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5802       break;
5803 
5804    case TGSI_OPCODE_FSLT:
5805       exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5806       break;
5807 
5808    case TGSI_OPCODE_FSNE:
5809       exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5810       break;
5811 
5812    case TGSI_OPCODE_IDIV:
5813       exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5814       break;
5815 
5816    case TGSI_OPCODE_IMAX:
5817       exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5818       break;
5819 
5820    case TGSI_OPCODE_IMIN:
5821       exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5822       break;
5823 
5824    case TGSI_OPCODE_INEG:
5825       exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5826       break;
5827 
5828    case TGSI_OPCODE_ISGE:
5829       exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5830       break;
5831 
5832    case TGSI_OPCODE_ISHR:
5833       exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5834       break;
5835 
5836    case TGSI_OPCODE_ISLT:
5837       exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5838       break;
5839 
5840    case TGSI_OPCODE_F2U:
5841       exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5842       break;
5843 
5844    case TGSI_OPCODE_U2F:
5845       exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
5846       break;
5847 
5848    case TGSI_OPCODE_UADD:
5849       exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5850       break;
5851 
5852    case TGSI_OPCODE_UDIV:
5853       exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5854       break;
5855 
5856    case TGSI_OPCODE_UMAD:
5857       exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5858       break;
5859 
5860    case TGSI_OPCODE_UMAX:
5861       exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5862       break;
5863 
5864    case TGSI_OPCODE_UMIN:
5865       exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5866       break;
5867 
5868    case TGSI_OPCODE_UMOD:
5869       exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5870       break;
5871 
5872    case TGSI_OPCODE_UMUL:
5873       exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5874       break;
5875 
5876    case TGSI_OPCODE_IMUL_HI:
5877       exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5878       break;
5879 
5880    case TGSI_OPCODE_UMUL_HI:
5881       exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5882       break;
5883 
5884    case TGSI_OPCODE_USEQ:
5885       exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5886       break;
5887 
5888    case TGSI_OPCODE_USGE:
5889       exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5890       break;
5891 
5892    case TGSI_OPCODE_USHR:
5893       exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5894       break;
5895 
5896    case TGSI_OPCODE_USLT:
5897       exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5898       break;
5899 
5900    case TGSI_OPCODE_USNE:
5901       exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5902       break;
5903 
5904    case TGSI_OPCODE_SWITCH:
5905       exec_switch(mach, inst);
5906       break;
5907 
5908    case TGSI_OPCODE_CASE:
5909       exec_case(mach, inst);
5910       break;
5911 
5912    case TGSI_OPCODE_DEFAULT:
5913       exec_default(mach);
5914       break;
5915 
5916    case TGSI_OPCODE_ENDSWITCH:
5917       exec_endswitch(mach);
5918       break;
5919 
5920    case TGSI_OPCODE_SAMPLE_I:
5921       exec_txf(mach, inst);
5922       break;
5923 
5924    case TGSI_OPCODE_SAMPLE_I_MS:
5925       exec_txf(mach, inst);
5926       break;
5927 
5928    case TGSI_OPCODE_SAMPLE:
5929       exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
5930       break;
5931 
5932    case TGSI_OPCODE_SAMPLE_B:
5933       exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
5934       break;
5935 
5936    case TGSI_OPCODE_SAMPLE_C:
5937       exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
5938       break;
5939 
5940    case TGSI_OPCODE_SAMPLE_C_LZ:
5941       exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
5942       break;
5943 
5944    case TGSI_OPCODE_SAMPLE_D:
5945       exec_sample_d(mach, inst);
5946       break;
5947 
5948    case TGSI_OPCODE_SAMPLE_L:
5949       exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
5950       break;
5951 
5952    case TGSI_OPCODE_GATHER4:
5953       exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE);
5954       break;
5955 
5956    case TGSI_OPCODE_SVIEWINFO:
5957       exec_txq(mach, inst);
5958       break;
5959 
5960    case TGSI_OPCODE_SAMPLE_POS:
5961       assert(0);
5962       break;
5963 
5964    case TGSI_OPCODE_SAMPLE_INFO:
5965       assert(0);
5966       break;
5967 
5968    case TGSI_OPCODE_LOD:
5969       exec_lodq(mach, inst);
5970       break;
5971 
5972    case TGSI_OPCODE_UARL:
5973       exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5974       break;
5975 
5976    case TGSI_OPCODE_UCMP:
5977       exec_ucmp(mach, inst);
5978       break;
5979 
5980    case TGSI_OPCODE_IABS:
5981       exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5982       break;
5983 
5984    case TGSI_OPCODE_ISSG:
5985       exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5986       break;
5987 
5988    case TGSI_OPCODE_TEX2:
5989       /* simple texture lookup */
5990       /* src[0] = texcoord */
5991       /* src[1] = compare */
5992       /* src[2] = sampler unit */
5993       exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
5994       break;
5995    case TGSI_OPCODE_TXB2:
5996       /* simple texture lookup */
5997       /* src[0] = texcoord */
5998       /* src[1] = bias */
5999       /* src[2] = sampler unit */
6000       exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
6001       break;
6002    case TGSI_OPCODE_TXL2:
6003       /* simple texture lookup */
6004       /* src[0] = texcoord */
6005       /* src[1] = lod */
6006       /* src[2] = sampler unit */
6007       exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
6008       break;
6009 
6010    case TGSI_OPCODE_IBFE:
6011       exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
6012       break;
6013    case TGSI_OPCODE_UBFE:
6014       exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6015       break;
6016    case TGSI_OPCODE_BFI:
6017       exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6018       break;
6019    case TGSI_OPCODE_BREV:
6020       exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6021       break;
6022    case TGSI_OPCODE_POPC:
6023       exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
6024       break;
6025    case TGSI_OPCODE_LSB:
6026       exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
6027       break;
6028    case TGSI_OPCODE_IMSB:
6029       exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
6030       break;
6031    case TGSI_OPCODE_UMSB:
6032       exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
6033       break;
6034 
6035    case TGSI_OPCODE_F2D:
6036       exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
6037       break;
6038 
6039    case TGSI_OPCODE_D2F:
6040       exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT);
6041       break;
6042 
6043    case TGSI_OPCODE_DABS:
6044       exec_double_unary(mach, inst, micro_dabs);
6045       break;
6046 
6047    case TGSI_OPCODE_DNEG:
6048       exec_double_unary(mach, inst, micro_dneg);
6049       break;
6050 
6051    case TGSI_OPCODE_DADD:
6052       exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
6053       break;
6054 
6055    case TGSI_OPCODE_DDIV:
6056       exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);
6057       break;
6058 
6059    case TGSI_OPCODE_DMUL:
6060       exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
6061       break;
6062 
6063    case TGSI_OPCODE_DMAX:
6064       exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);
6065       break;
6066 
6067    case TGSI_OPCODE_DMIN:
6068       exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);
6069       break;
6070 
6071    case TGSI_OPCODE_DSLT:
6072       exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);
6073       break;
6074 
6075    case TGSI_OPCODE_DSGE:
6076       exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);
6077       break;
6078 
6079    case TGSI_OPCODE_DSEQ:
6080       exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);
6081       break;
6082 
6083    case TGSI_OPCODE_DSNE:
6084       exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);
6085       break;
6086 
6087    case TGSI_OPCODE_DRCP:
6088       exec_double_unary(mach, inst, micro_drcp);
6089       break;
6090 
6091    case TGSI_OPCODE_DSQRT:
6092       exec_double_unary(mach, inst, micro_dsqrt);
6093       break;
6094 
6095    case TGSI_OPCODE_DRSQ:
6096       exec_double_unary(mach, inst, micro_drsq);
6097       break;
6098 
6099    case TGSI_OPCODE_DMAD:
6100       exec_double_trinary(mach, inst, micro_dmad);
6101       break;
6102 
6103    case TGSI_OPCODE_DFRAC:
6104       exec_double_unary(mach, inst, micro_dfrac);
6105       break;
6106 
6107    case TGSI_OPCODE_DLDEXP:
6108       exec_dldexp(mach, inst);
6109       break;
6110 
6111    case TGSI_OPCODE_DFRACEXP:
6112       exec_dfracexp(mach, inst);
6113       break;
6114 
6115    case TGSI_OPCODE_I2D:
6116       exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT);
6117       break;
6118 
6119    case TGSI_OPCODE_D2I:
6120       exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT);
6121       break;
6122 
6123    case TGSI_OPCODE_U2D:
6124       exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT);
6125       break;
6126 
6127    case TGSI_OPCODE_D2U:
6128       exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT);
6129       break;
6130 
6131    case TGSI_OPCODE_LOAD:
6132       exec_load(mach, inst);
6133       break;
6134 
6135    case TGSI_OPCODE_STORE:
6136       exec_store(mach, inst);
6137       break;
6138 
6139    case TGSI_OPCODE_ATOMUADD:
6140    case TGSI_OPCODE_ATOMXCHG:
6141    case TGSI_OPCODE_ATOMCAS:
6142    case TGSI_OPCODE_ATOMAND:
6143    case TGSI_OPCODE_ATOMOR:
6144    case TGSI_OPCODE_ATOMXOR:
6145    case TGSI_OPCODE_ATOMUMIN:
6146    case TGSI_OPCODE_ATOMUMAX:
6147    case TGSI_OPCODE_ATOMIMIN:
6148    case TGSI_OPCODE_ATOMIMAX:
6149    case TGSI_OPCODE_ATOMFADD:
6150       exec_atomop(mach, inst);
6151       break;
6152 
6153    case TGSI_OPCODE_RESQ:
6154       exec_resq(mach, inst);
6155       break;
6156    case TGSI_OPCODE_BARRIER:
6157    case TGSI_OPCODE_MEMBAR:
6158       return TRUE;
6159       break;
6160 
6161    case TGSI_OPCODE_I64ABS:
6162       exec_double_unary(mach, inst, micro_i64abs);
6163       break;
6164 
6165    case TGSI_OPCODE_I64SSG:
6166       exec_double_unary(mach, inst, micro_i64sgn);
6167       break;
6168 
6169    case TGSI_OPCODE_I64NEG:
6170       exec_double_unary(mach, inst, micro_i64neg);
6171       break;
6172 
6173    case TGSI_OPCODE_U64SEQ:
6174       exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
6175       break;
6176 
6177    case TGSI_OPCODE_U64SNE:
6178       exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
6179       break;
6180 
6181    case TGSI_OPCODE_I64SLT:
6182       exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
6183       break;
6184    case TGSI_OPCODE_U64SLT:
6185       exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
6186       break;
6187 
6188    case TGSI_OPCODE_I64SGE:
6189       exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
6190       break;
6191    case TGSI_OPCODE_U64SGE:
6192       exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
6193       break;
6194 
6195    case TGSI_OPCODE_I64MIN:
6196       exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
6197       break;
6198    case TGSI_OPCODE_U64MIN:
6199       exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
6200       break;
6201    case TGSI_OPCODE_I64MAX:
6202       exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
6203       break;
6204    case TGSI_OPCODE_U64MAX:
6205       exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
6206       break;
6207    case TGSI_OPCODE_U64ADD:
6208       exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
6209       break;
6210    case TGSI_OPCODE_U64MUL:
6211       exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
6212       break;
6213    case TGSI_OPCODE_U64SHL:
6214       exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
6215       break;
6216    case TGSI_OPCODE_I64SHR:
6217       exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
6218       break;
6219    case TGSI_OPCODE_U64SHR:
6220       exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
6221       break;
6222    case TGSI_OPCODE_U64DIV:
6223       exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
6224       break;
6225    case TGSI_OPCODE_I64DIV:
6226       exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
6227       break;
6228    case TGSI_OPCODE_U64MOD:
6229       exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
6230       break;
6231    case TGSI_OPCODE_I64MOD:
6232       exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
6233       break;
6234 
6235    case TGSI_OPCODE_F2U64:
6236       exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
6237       break;
6238 
6239    case TGSI_OPCODE_F2I64:
6240       exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
6241       break;
6242 
6243    case TGSI_OPCODE_U2I64:
6244       exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
6245       break;
6246    case TGSI_OPCODE_I2I64:
6247       exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
6248       break;
6249 
6250    case TGSI_OPCODE_D2U64:
6251       exec_double_unary(mach, inst, micro_d2u64);
6252       break;
6253 
6254    case TGSI_OPCODE_D2I64:
6255       exec_double_unary(mach, inst, micro_d2i64);
6256       break;
6257 
6258    case TGSI_OPCODE_U642F:
6259       exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT);
6260       break;
6261    case TGSI_OPCODE_I642F:
6262       exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT);
6263       break;
6264 
6265    case TGSI_OPCODE_U642D:
6266       exec_double_unary(mach, inst, micro_u642d);
6267       break;
6268    case TGSI_OPCODE_I642D:
6269       exec_double_unary(mach, inst, micro_i642d);
6270       break;
6271    case TGSI_OPCODE_INTERP_SAMPLE:
6272       exec_interp_at_sample(mach, inst);
6273       break;
6274    case TGSI_OPCODE_INTERP_OFFSET:
6275       exec_interp_at_offset(mach, inst);
6276       break;
6277    case TGSI_OPCODE_INTERP_CENTROID:
6278       exec_interp_at_centroid(mach, inst);
6279       break;
6280    default:
6281       assert( 0 );
6282    }
6283    return FALSE;
6284 }
6285 
6286 static void
tgsi_exec_machine_setup_masks(struct tgsi_exec_machine * mach)6287 tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
6288 {
6289    uint default_mask = 0xf;
6290 
6291    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
6292    mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
6293 
6294    if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
6295       for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
6296          mach->Temps[temp_prim_idxs[i].idx].xyzw[temp_prim_idxs[i].chan].u[0] = 0;
6297          mach->Primitives[i][0] = 0;
6298       }
6299       /* GS runs on a single primitive for now */
6300       default_mask = 0x1;
6301    }
6302 
6303    if (mach->NonHelperMask == 0)
6304       mach->NonHelperMask = default_mask;
6305    mach->CondMask = default_mask;
6306    mach->LoopMask = default_mask;
6307    mach->ContMask = default_mask;
6308    mach->FuncMask = default_mask;
6309    mach->ExecMask = default_mask;
6310 
6311    mach->Switch.mask = default_mask;
6312 
6313    assert(mach->CondStackTop == 0);
6314    assert(mach->LoopStackTop == 0);
6315    assert(mach->ContStackTop == 0);
6316    assert(mach->SwitchStackTop == 0);
6317    assert(mach->BreakStackTop == 0);
6318    assert(mach->CallStackTop == 0);
6319 }
6320 
6321 /**
6322  * Run TGSI interpreter.
6323  * \return bitmask of "alive" quad components
6324  */
6325 uint
tgsi_exec_machine_run(struct tgsi_exec_machine * mach,int start_pc)6326 tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )
6327 {
6328    uint i;
6329 
6330    mach->pc = start_pc;
6331 
6332    if (!start_pc) {
6333       tgsi_exec_machine_setup_masks(mach);
6334 
6335       /* execute declarations (interpolants) */
6336       for (i = 0; i < mach->NumDeclarations; i++) {
6337          exec_declaration( mach, mach->Declarations+i );
6338       }
6339    }
6340 
6341    {
6342 #if DEBUG_EXECUTION
6343       struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
6344       struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
6345       uint inst = 1;
6346 
6347       if (!start_pc) {
6348          memset(mach->Temps, 0, sizeof(temps));
6349          if (mach->Outputs)
6350             memset(mach->Outputs, 0, sizeof(outputs));
6351          memset(temps, 0, sizeof(temps));
6352          memset(outputs, 0, sizeof(outputs));
6353       }
6354 #endif
6355 
6356       /* execute instructions, until pc is set to -1 */
6357       while (mach->pc != -1) {
6358          boolean barrier_hit;
6359 #if DEBUG_EXECUTION
6360          uint i;
6361 
6362          tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);
6363 #endif
6364 
6365          assert(mach->pc < (int) mach->NumInstructions);
6366          barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);
6367 
6368          /* for compute shaders if we hit a barrier return now for later rescheduling */
6369          if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)
6370             return 0;
6371 
6372 #if DEBUG_EXECUTION
6373          for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
6374             if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
6375                uint j;
6376 
6377                memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
6378                debug_printf("TEMP[%2u] = ", i);
6379                for (j = 0; j < 4; j++) {
6380                   if (j > 0) {
6381                      debug_printf("           ");
6382                   }
6383                   debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6384                                temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
6385                                temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
6386                                temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
6387                                temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
6388                }
6389             }
6390          }
6391          if (mach->Outputs) {
6392             for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
6393                if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
6394                   uint j;
6395 
6396                   memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
6397                   debug_printf("OUT[%2u] =  ", i);
6398                   for (j = 0; j < 4; j++) {
6399                      if (j > 0) {
6400                         debug_printf("           ");
6401                      }
6402                      debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6403                                   outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
6404                                   outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
6405                                   outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
6406                                   outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
6407                   }
6408                }
6409             }
6410          }
6411 #endif
6412       }
6413    }
6414 
6415 #if 0
6416    /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
6417    if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
6418       /*
6419        * Scale back depth component.
6420        */
6421       for (i = 0; i < 4; i++)
6422          mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
6423    }
6424 #endif
6425 
6426    /* Strictly speaking, these assertions aren't really needed but they
6427     * can potentially catch some bugs in the control flow code.
6428     */
6429    assert(mach->CondStackTop == 0);
6430    assert(mach->LoopStackTop == 0);
6431    assert(mach->ContStackTop == 0);
6432    assert(mach->SwitchStackTop == 0);
6433    assert(mach->BreakStackTop == 0);
6434    assert(mach->CallStackTop == 0);
6435 
6436    return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
6437 }
6438