1 /**
2  * MojoShader; generate shader programs from bytecode of compiled
3  *  Direct3D shaders.
4  *
5  * Please see the file LICENSE.txt in the source's root directory.
6  *
7  *  This file written by Ryan C. Gordon.
8  */
9 
10 // !!! FIXME: this file really needs to be split up.
11 // !!! FIXME: I keep changing coding styles for symbols and typedefs.
12 
13 // !!! FIXME: rules from MSDN about temp registers we probably don't check.
14 // - There are limited temporaries: vs_1_1 has 12 (ps_1_1 has _2_!).
15 // - SM2 apparently was variable, between 12 and 32. Shader Model 3 has 32.
16 // - A maximum of three temp registers can be used in a single instruction.
17 
18 #define __MOJOSHADER_INTERNAL__ 1
19 #include "mojoshader_internal.h"
20 
21 typedef struct ConstantsList
22 {
23     MOJOSHADER_constant constant;
24     struct ConstantsList *next;
25 } ConstantsList;
26 
27 typedef struct VariableList
28 {
29     MOJOSHADER_uniformType type;
30     int index;
31     int count;
32     ConstantsList *constant;
33     int used;
34     int emit_position;  // used in some profiles.
35     struct VariableList *next;
36 } VariableList;
37 
38 typedef struct RegisterList
39 {
40     RegisterType regtype;
41     int regnum;
42     MOJOSHADER_usage usage;
43     unsigned int index;
44     int writemask;
45     int misc;
46     int written;
47     const VariableList *array;
48     struct RegisterList *next;
49 } RegisterList;
50 
51 typedef struct
52 {
53     const uint32 *token;   // this is the unmolested token in the stream.
54     int regnum;
55     int swizzle;  // xyzw (all four, not split out).
56     int swizzle_x;
57     int swizzle_y;
58     int swizzle_z;
59     int swizzle_w;
60     SourceMod src_mod;
61     RegisterType regtype;
62     int relative;
63     RegisterType relative_regtype;
64     int relative_regnum;
65     int relative_component;
66     const VariableList *relative_array;
67 } SourceArgInfo;
68 
69 struct Profile;  // predeclare.
70 
71 typedef struct CtabData
72 {
73     int have_ctab;
74     int symbol_count;
75     MOJOSHADER_symbol *symbols;
76 } CtabData;
77 
78 // Context...this is state that changes as we parse through a shader...
79 typedef struct Context
80 {
81     int isfail;
82     int out_of_memory;
83     MOJOSHADER_malloc malloc;
84     MOJOSHADER_free free;
85     void *malloc_data;
86     int current_position;
87     const uint32 *orig_tokens;
88     const uint32 *tokens;
89     uint32 tokencount;
90     const MOJOSHADER_swizzle *swizzles;
91     unsigned int swizzles_count;
92     const MOJOSHADER_samplerMap *samplermap;
93     unsigned int samplermap_count;
94     Buffer *output;
95     Buffer *preflight;
96     Buffer *globals;
97     Buffer *helpers;
98     Buffer *subroutines;
99     Buffer *mainline_intro;
100     Buffer *mainline;
101     Buffer *ignore;
102     Buffer *output_stack[2];
103     int indent_stack[2];
104     int output_stack_len;
105     int indent;
106     const char *shader_type_str;
107     const char *endline;
108     int endline_len;
109     int profileid;
110     const struct Profile *profile;
111     MOJOSHADER_shaderType shader_type;
112     uint8 major_ver;
113     uint8 minor_ver;
114     DestArgInfo dest_arg;
115     SourceArgInfo source_args[5];
116     SourceArgInfo predicate_arg;  // for predicated instructions.
117     uint32 dwords[4];
118     uint32 version_token;
119     int instruction_count;
120     uint32 instruction_controls;
121     uint32 previous_opcode;
122     int coissue;
123     int loops;
124     int reps;
125     int max_reps;
126     int cmps;
127     int scratch_registers;
128     int max_scratch_registers;
129     int branch_labels_stack_index;
130     int branch_labels_stack[32];
131     int assigned_branch_labels;
132     int assigned_vertex_attributes;
133     int last_address_reg_component;
134     RegisterList used_registers;
135     RegisterList defined_registers;
136     ErrorList *errors;
137     int constant_count;
138     ConstantsList *constants;
139     int uniform_count;
140     int uniform_float4_count;
141     int uniform_int4_count;
142     int uniform_bool_count;
143     RegisterList uniforms;
144     int attribute_count;
145     RegisterList attributes;
146     int sampler_count;
147     RegisterList samplers;
148     VariableList *variables;  // variables to register mapping.
149     int centroid_allowed;
150     CtabData ctab;
151     int have_relative_input_registers;
152     int have_multi_color_outputs;
153     int determined_constants_arrays;
154     int predicated;
155     int uses_pointsize;
156     int uses_fog;
157     int glsl_generated_lit_helper;
158     int glsl_generated_texldd_setup;
159     int glsl_generated_texm3x3spec_helper;
160     int arb1_wrote_position;
161     int have_preshader;
162     int ignores_ctab;
163     int reset_texmpad;
164     int texm3x2pad_dst0;
165     int texm3x2pad_src0;
166     int texm3x3pad_dst0;
167     int texm3x3pad_src0;
168     int texm3x3pad_dst1;
169     int texm3x3pad_src1;
170     MOJOSHADER_preshader *preshader;
171 
172 #if SUPPORT_PROFILE_ARB1_NV
173     int profile_supports_nv2;
174     int profile_supports_nv3;
175     int profile_supports_nv4;
176 #endif
177 #if SUPPORT_PROFILE_GLSL120
178     int profile_supports_glsl120;
179 #endif
180 } Context;
181 
182 
183 // Use these macros so we can remove all bits of these profiles from the build.
184 #if SUPPORT_PROFILE_ARB1_NV
185 #define support_nv2(ctx) ((ctx)->profile_supports_nv2)
186 #define support_nv3(ctx) ((ctx)->profile_supports_nv3)
187 #define support_nv4(ctx) ((ctx)->profile_supports_nv4)
188 #else
189 #define support_nv2(ctx) (0)
190 #define support_nv3(ctx) (0)
191 #define support_nv4(ctx) (0)
192 #endif
193 
194 #if SUPPORT_PROFILE_GLSL120
195 #define support_glsl120(ctx) ((ctx)->profile_supports_glsl120)
196 #else
197 #define support_glsl120(ctx) (0)
198 #endif
199 
200 
201 // Profile entry points...
202 
203 // one emit function for each opcode in each profile.
204 typedef void (*emit_function)(Context *ctx);
205 
206 // one emit function for starting output in each profile.
207 typedef void (*emit_start)(Context *ctx, const char *profilestr);
208 
209 // one emit function for ending output in each profile.
210 typedef void (*emit_end)(Context *ctx);
211 
212 // one emit function for phase opcode output in each profile.
213 typedef void (*emit_phase)(Context *ctx);
214 
215 // one emit function for finalizing output in each profile.
216 typedef void (*emit_finalize)(Context *ctx);
217 
218 // one emit function for global definitions in each profile.
219 typedef void (*emit_global)(Context *ctx, RegisterType regtype, int regnum);
220 
221 // one emit function for relative uniform arrays in each profile.
222 typedef void (*emit_array)(Context *ctx, VariableList *var);
223 
224 // one emit function for relative constants arrays in each profile.
225 typedef void (*emit_const_array)(Context *ctx,
226                                  const struct ConstantsList *constslist,
227                                  int base, int size);
228 
229 // one emit function for uniforms in each profile.
230 typedef void (*emit_uniform)(Context *ctx, RegisterType regtype, int regnum,
231                              const VariableList *var);
232 
233 // one emit function for samplers in each profile.
234 typedef void (*emit_sampler)(Context *ctx, int stage, TextureType ttype,
235                              int texbem);
236 
237 // one emit function for attributes in each profile.
238 typedef void (*emit_attribute)(Context *ctx, RegisterType regtype, int regnum,
239                                MOJOSHADER_usage usage, int index, int wmask,
240                                int flags);
241 
242 // one args function for each possible sequence of opcode arguments.
243 typedef int (*args_function)(Context *ctx);
244 
245 // one state function for each opcode where we have state machine updates.
246 typedef void (*state_function)(Context *ctx);
247 
248 // one function for varnames in each profile.
249 typedef const char *(*varname_function)(Context *c, RegisterType t, int num);
250 
251 // one function for const var array in each profile.
252 typedef const char *(*const_array_varname_function)(Context *c, int base, int size);
253 
254 typedef struct Profile
255 {
256     const char *name;
257     emit_start start_emitter;
258     emit_end end_emitter;
259     emit_phase phase_emitter;
260     emit_global global_emitter;
261     emit_array array_emitter;
262     emit_const_array const_array_emitter;
263     emit_uniform uniform_emitter;
264     emit_sampler sampler_emitter;
265     emit_attribute attribute_emitter;
266     emit_finalize finalize_emitter;
267     varname_function get_varname;
268     const_array_varname_function get_const_array_varname;
269 } Profile;
270 
271 
272 // Convenience functions for allocators...
273 #if !MOJOSHADER_FORCE_ALLOCATOR
MOJOSHADER_internal_malloc(int bytes,void * d)274 void *MOJOSHADER_internal_malloc(int bytes, void *d) { return malloc(bytes); }
MOJOSHADER_internal_free(void * ptr,void * d)275 void MOJOSHADER_internal_free(void *ptr, void *d) { free(ptr); }
276 #endif
277 
278 MOJOSHADER_error MOJOSHADER_out_of_mem_error = {
279     "Out of memory", NULL, MOJOSHADER_POSITION_NONE
280 };
281 
282 MOJOSHADER_parseData MOJOSHADER_out_of_mem_data = {
283     1, &MOJOSHADER_out_of_mem_error, 0, 0, 0, 0,
284     MOJOSHADER_TYPE_UNKNOWN, 0, 0, 0, 0
285 };
286 
287 
288 // !!! FIXME: cut and paste between every damned source file follows...
289 // !!! FIXME: We need to make some sort of ContextBase that applies to all
290 // !!! FIXME:  files and move this stuff to mojoshader_common.c ...
291 
out_of_memory(Context * ctx)292 static inline void out_of_memory(Context *ctx)
293 {
294     ctx->isfail = ctx->out_of_memory = 1;
295 } // out_of_memory
296 
Malloc(Context * ctx,const size_t len)297 static inline void *Malloc(Context *ctx, const size_t len)
298 {
299     void *retval = ctx->malloc((int) len, ctx->malloc_data);
300     if (retval == NULL)
301         out_of_memory(ctx);
302     return retval;
303 } // Malloc
304 
StrDup(Context * ctx,const char * str)305 static inline char *StrDup(Context *ctx, const char *str)
306 {
307     char *retval = (char *) Malloc(ctx, strlen(str) + 1);
308     if (retval != NULL)
309         strcpy(retval, str);
310     return retval;
311 } // StrDup
312 
Free(Context * ctx,void * ptr)313 static inline void Free(Context *ctx, void *ptr)
314 {
315     ctx->free(ptr, ctx->malloc_data);
316 } // Free
317 
MallocBridge(int bytes,void * data)318 static void *MallocBridge(int bytes, void *data)
319 {
320     return Malloc((Context *) data, (size_t) bytes);
321 } // MallocBridge
322 
FreeBridge(void * ptr,void * data)323 static void FreeBridge(void *ptr, void *data)
324 {
325     Free((Context *) data, ptr);
326 } // FreeBridge
327 
328 
329 // jump between output sections in the context...
330 
set_output(Context * ctx,Buffer ** section)331 static int set_output(Context *ctx, Buffer **section)
332 {
333     // only create output sections on first use.
334     if (*section == NULL)
335     {
336         *section = buffer_create(256, MallocBridge, FreeBridge, ctx);
337         if (*section == NULL)
338             return 0;
339     } // if
340 
341     ctx->output = *section;
342     return 1;
343 } // set_output
344 
push_output(Context * ctx,Buffer ** section)345 static void push_output(Context *ctx, Buffer **section)
346 {
347     assert(ctx->output_stack_len < (int) (STATICARRAYLEN(ctx->output_stack)));
348     ctx->output_stack[ctx->output_stack_len] = ctx->output;
349     ctx->indent_stack[ctx->output_stack_len] = ctx->indent;
350     ctx->output_stack_len++;
351     if (!set_output(ctx, section))
352         return;
353     ctx->indent = 0;
354 } // push_output
355 
pop_output(Context * ctx)356 static inline void pop_output(Context *ctx)
357 {
358     assert(ctx->output_stack_len > 0);
359     ctx->output_stack_len--;
360     ctx->output = ctx->output_stack[ctx->output_stack_len];
361     ctx->indent = ctx->indent_stack[ctx->output_stack_len];
362 } // pop_output
363 
364 
365 
366 // Shader model version magic...
367 
ver_ui32(const uint8 major,const uint8 minor)368 static inline uint32 ver_ui32(const uint8 major, const uint8 minor)
369 {
370     return ( (((uint32) major) << 16) | (((minor) == 0xFF) ? 1 : (minor)) );
371 } // version_ui32
372 
shader_version_supported(const uint8 maj,const uint8 min)373 static inline int shader_version_supported(const uint8 maj, const uint8 min)
374 {
375     return (ver_ui32(maj,min) <= ver_ui32(MAX_SHADER_MAJOR, MAX_SHADER_MINOR));
376 } // shader_version_supported
377 
shader_version_atleast(const Context * ctx,const uint8 maj,const uint8 min)378 static inline int shader_version_atleast(const Context *ctx, const uint8 maj,
379                                          const uint8 min)
380 {
381     return (ver_ui32(ctx->major_ver, ctx->minor_ver) >= ver_ui32(maj, min));
382 } // shader_version_atleast
383 
shader_version_exactly(const Context * ctx,const uint8 maj,const uint8 min)384 static inline int shader_version_exactly(const Context *ctx, const uint8 maj,
385                                          const uint8 min)
386 {
387     return ((ctx->major_ver == maj) && (ctx->minor_ver == min));
388 } // shader_version_exactly
389 
shader_is_pixel(const Context * ctx)390 static inline int shader_is_pixel(const Context *ctx)
391 {
392     return (ctx->shader_type == MOJOSHADER_TYPE_PIXEL);
393 } // shader_is_pixel
394 
shader_is_vertex(const Context * ctx)395 static inline int shader_is_vertex(const Context *ctx)
396 {
397     return (ctx->shader_type == MOJOSHADER_TYPE_VERTEX);
398 } // shader_is_vertex
399 
400 
isfail(const Context * ctx)401 static inline int isfail(const Context *ctx)
402 {
403     return ctx->isfail;
404 } // isfail
405 
406 
407 static void failf(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
failf(Context * ctx,const char * fmt,...)408 static void failf(Context *ctx, const char *fmt, ...)
409 {
410     ctx->isfail = 1;
411     if (ctx->out_of_memory)
412         return;
413 
414     // no filename at this level (we pass a NULL to errorlist_add_va()...)
415     va_list ap;
416     va_start(ap, fmt);
417     errorlist_add_va(ctx->errors, NULL, ctx->current_position, fmt, ap);
418     va_end(ap);
419 } // failf
420 
421 
fail(Context * ctx,const char * reason)422 static inline void fail(Context *ctx, const char *reason)
423 {
424     failf(ctx, "%s", reason);
425 } // fail
426 
427 
428 static void output_line(Context *ctx, const char *fmt, ...) ISPRINTF(2,3);
output_line(Context * ctx,const char * fmt,...)429 static void output_line(Context *ctx, const char *fmt, ...)
430 {
431     assert(ctx->output != NULL);
432     if (isfail(ctx))
433         return;  // we failed previously, don't go on...
434 
435     const int indent = ctx->indent;
436     if (indent > 0)
437     {
438         char *indentbuf = (char *) alloca(indent);
439         memset(indentbuf, '\t', indent);
440         buffer_append(ctx->output, indentbuf, indent);
441     } // if
442 
443     va_list ap;
444     va_start(ap, fmt);
445     buffer_append_va(ctx->output, fmt, ap);
446     va_end(ap);
447 
448     buffer_append(ctx->output, ctx->endline, ctx->endline_len);
449 } // output_line
450 
451 
output_blank_line(Context * ctx)452 static inline void output_blank_line(Context *ctx)
453 {
454     assert(ctx->output != NULL);
455     if (!isfail(ctx))
456         buffer_append(ctx->output, ctx->endline, ctx->endline_len);
457 } // output_blank_line
458 
459 
460 // !!! FIXME: this is sort of nasty.
floatstr(Context * ctx,char * buf,size_t bufsize,float f,int leavedecimal)461 static void floatstr(Context *ctx, char *buf, size_t bufsize, float f,
462                      int leavedecimal)
463 {
464     const size_t len = snprintf(buf, bufsize, "%f", f);
465     if ((len+2) >= bufsize)
466         fail(ctx, "BUG: internal buffer is too small");
467     else
468     {
469         char *end = buf + len;
470         char *ptr = strchr(buf, '.');
471         if (ptr == NULL)
472         {
473             if (leavedecimal)
474                 strcat(buf, ".0");
475             return;  // done.
476         } // if
477 
478         while (--end != ptr)
479         {
480             if (*end != '0')
481             {
482                 end++;
483                 break;
484             } // if
485         } // while
486         if ((leavedecimal) && (end == ptr))
487             end += 2;
488         *end = '\0';  // chop extra '0' or all decimal places off.
489     } // else
490 } // floatstr
491 
cvtMojoToD3DSamplerType(const MOJOSHADER_samplerType type)492 static inline TextureType cvtMojoToD3DSamplerType(const MOJOSHADER_samplerType type)
493 {
494     return (TextureType) (((int) type) + 2);
495 } // cvtMojoToD3DSamplerType
496 
cvtD3DToMojoSamplerType(const TextureType type)497 static inline MOJOSHADER_samplerType cvtD3DToMojoSamplerType(const TextureType type)
498 {
499     return (MOJOSHADER_samplerType) (((int) type) - 2);
500 } // cvtD3DToMojoSamplerType
501 
502 
503 // Deal with register lists...  !!! FIXME: I sort of hate this.
504 
free_reglist(MOJOSHADER_free f,void * d,RegisterList * item)505 static void free_reglist(MOJOSHADER_free f, void *d, RegisterList *item)
506 {
507     while (item != NULL)
508     {
509         RegisterList *next = item->next;
510         f(item, d);
511         item = next;
512     } // while
513 } // free_reglist
514 
reg_to_ui32(const RegisterType regtype,const int regnum)515 static inline uint32 reg_to_ui32(const RegisterType regtype, const int regnum)
516 {
517     return ( ((uint32) regtype) | (((uint32) regnum) << 16) );
518 } // reg_to_uint32
519 
520 // !!! FIXME: ditch this for a hash table.
reglist_insert(Context * ctx,RegisterList * prev,const RegisterType regtype,const int regnum)521 static RegisterList *reglist_insert(Context *ctx, RegisterList *prev,
522                                     const RegisterType regtype,
523                                     const int regnum)
524 {
525     const uint32 newval = reg_to_ui32(regtype, regnum);
526     RegisterList *item = prev->next;
527     while (item != NULL)
528     {
529         const uint32 val = reg_to_ui32(item->regtype, item->regnum);
530         if (newval == val)
531             return item;  // already set, so we're done.
532         else if (newval < val)  // insert it here.
533             break;
534         else // if (newval > val)
535         {
536             // keep going, we're not to the insertion point yet.
537             prev = item;
538             item = item->next;
539         } // else
540     } // while
541 
542     // we need to insert an entry after (prev).
543     item = (RegisterList *) Malloc(ctx, sizeof (RegisterList));
544     if (item != NULL)
545     {
546         item->regtype = regtype;
547         item->regnum = regnum;
548         item->usage = MOJOSHADER_USAGE_UNKNOWN;
549         item->index = 0;
550         item->writemask = 0;
551         item->misc = 0;
552         item->array = NULL;
553         item->next = prev->next;
554         prev->next = item;
555     } // if
556 
557     return item;
558 } // reglist_insert
559 
reglist_find(const RegisterList * prev,const RegisterType rtype,const int regnum)560 static RegisterList *reglist_find(const RegisterList *prev,
561                                   const RegisterType rtype, const int regnum)
562 {
563     const uint32 newval = reg_to_ui32(rtype, regnum);
564     RegisterList *item = prev->next;
565     while (item != NULL)
566     {
567         const uint32 val = reg_to_ui32(item->regtype, item->regnum);
568         if (newval == val)
569             return item;  // here it is.
570         else if (newval < val)  // should have been here if it existed.
571             return NULL;
572         else // if (newval > val)
573             item = item->next;
574     } // while
575 
576     return NULL;  // wasn't in the list.
577 } // reglist_find
578 
reglist_exists(RegisterList * prev,const RegisterType regtype,const int regnum)579 static inline const RegisterList *reglist_exists(RegisterList *prev,
580                                                  const RegisterType regtype,
581                                                  const int regnum)
582 {
583     return (reglist_find(prev, regtype, regnum));
584 } // reglist_exists
585 
register_was_written(Context * ctx,const RegisterType rtype,const int regnum)586 static inline int register_was_written(Context *ctx, const RegisterType rtype,
587                                        const int regnum)
588 {
589     RegisterList *reg = reglist_find(&ctx->used_registers, rtype, regnum);
590     return (reg && reg->written);
591 } // register_was_written
592 
set_used_register(Context * ctx,const RegisterType regtype,const int regnum,const int written)593 static inline RegisterList *set_used_register(Context *ctx,
594                                               const RegisterType regtype,
595                                               const int regnum,
596                                               const int written)
597 {
598     RegisterList *reg = NULL;
599     if ((regtype == REG_TYPE_COLOROUT) && (regnum > 0))
600         ctx->have_multi_color_outputs = 1;
601 
602     reg = reglist_insert(ctx, &ctx->used_registers, regtype, regnum);
603     if (reg && written)
604         reg->written = 1;
605     return reg;
606 } // set_used_register
607 
get_used_register(Context * ctx,const RegisterType regtype,const int regnum)608 static inline int get_used_register(Context *ctx, const RegisterType regtype,
609                                     const int regnum)
610 {
611     return (reglist_exists(&ctx->used_registers, regtype, regnum) != NULL);
612 } // get_used_register
613 
set_defined_register(Context * ctx,const RegisterType rtype,const int regnum)614 static inline void set_defined_register(Context *ctx, const RegisterType rtype,
615                                         const int regnum)
616 {
617     reglist_insert(ctx, &ctx->defined_registers, rtype, regnum);
618 } // set_defined_register
619 
get_defined_register(Context * ctx,const RegisterType rtype,const int regnum)620 static inline int get_defined_register(Context *ctx, const RegisterType rtype,
621                                        const int regnum)
622 {
623     return (reglist_exists(&ctx->defined_registers, rtype, regnum) != NULL);
624 } // get_defined_register
625 
add_attribute_register(Context * ctx,const RegisterType rtype,const int regnum,const MOJOSHADER_usage usage,const int index,const int writemask,int flags)626 static void add_attribute_register(Context *ctx, const RegisterType rtype,
627                                 const int regnum, const MOJOSHADER_usage usage,
628                                 const int index, const int writemask, int flags)
629 {
630     RegisterList *item = reglist_insert(ctx, &ctx->attributes, rtype, regnum);
631     item->usage = usage;
632     item->index = index;
633     item->writemask = writemask;
634     item->misc = flags;
635 
636     if ((rtype == REG_TYPE_OUTPUT) && (usage == MOJOSHADER_USAGE_POINTSIZE))
637         ctx->uses_pointsize = 1;  // note that we have to check this later.
638     else if ((rtype == REG_TYPE_OUTPUT) && (usage == MOJOSHADER_USAGE_FOG))
639         ctx->uses_fog = 1;  // note that we have to check this later.
640 } // add_attribute_register
641 
add_sampler(Context * ctx,const int regnum,TextureType ttype,const int texbem)642 static inline void add_sampler(Context *ctx, const int regnum,
643                                TextureType ttype, const int texbem)
644 {
645     const RegisterType rtype = REG_TYPE_SAMPLER;
646 
647     // !!! FIXME: make sure it doesn't exist?
648     // !!! FIXME:  (ps_1_1 assume we can add it multiple times...)
649     RegisterList *item = reglist_insert(ctx, &ctx->samplers, rtype, regnum);
650 
651     if (ctx->samplermap != NULL)
652     {
653         unsigned int i;
654         for (i = 0; i < ctx->samplermap_count; i++)
655         {
656             if (ctx->samplermap[i].index == regnum)
657             {
658                 ttype = cvtMojoToD3DSamplerType(ctx->samplermap[i].type);
659                 break;
660             } // if
661         } // for
662     } // if
663 
664     item->index = (int) ttype;
665     item->misc |= texbem;
666 } // add_sampler
667 
668 
writemask_xyzw(const int writemask)669 static inline int writemask_xyzw(const int writemask)
670 {
671     return (writemask == 0xF);  // 0xF == 1111. No explicit mask (full!).
672 } // writemask_xyzw
673 
674 
writemask_xyz(const int writemask)675 static inline int writemask_xyz(const int writemask)
676 {
677     return (writemask == 0x7);  // 0x7 == 0111. (that is: xyz)
678 } // writemask_xyz
679 
680 
writemask_xy(const int writemask)681 static inline int writemask_xy(const int writemask)
682 {
683     return (writemask == 0x3);  // 0x3 == 0011. (that is: xy)
684 } // writemask_xy
685 
686 
writemask_x(const int writemask)687 static inline int writemask_x(const int writemask)
688 {
689     return (writemask == 0x1);  // 0x1 == 0001. (that is: x)
690 } // writemask_x
691 
692 
writemask_y(const int writemask)693 static inline int writemask_y(const int writemask)
694 {
695     return (writemask == 0x2);  // 0x1 == 0010. (that is: y)
696 } // writemask_y
697 
698 
replicate_swizzle(const int swizzle)699 static inline int replicate_swizzle(const int swizzle)
700 {
701     return ( (((swizzle >> 0) & 0x3) == ((swizzle >> 2) & 0x3)) &&
702              (((swizzle >> 2) & 0x3) == ((swizzle >> 4) & 0x3)) &&
703              (((swizzle >> 4) & 0x3) == ((swizzle >> 6) & 0x3)) );
704 } // replicate_swizzle
705 
706 
no_swizzle(const int swizzle)707 static inline int no_swizzle(const int swizzle)
708 {
709     return (swizzle == 0xE4);  // 0xE4 == 11100100 ... 0 1 2 3. No swizzle.
710 } // no_swizzle
711 
712 
vecsize_from_writemask(const int m)713 static inline int vecsize_from_writemask(const int m)
714 {
715     return (m & 1) + ((m >> 1) & 1) + ((m >> 2) & 1) + ((m >> 3) & 1);
716 } // vecsize_from_writemask
717 
718 
set_dstarg_writemask(DestArgInfo * dst,const int mask)719 static inline void set_dstarg_writemask(DestArgInfo *dst, const int mask)
720 {
721     dst->writemask = mask;
722     dst->writemask0 = ((mask >> 0) & 1);
723     dst->writemask1 = ((mask >> 1) & 1);
724     dst->writemask2 = ((mask >> 2) & 1);
725     dst->writemask3 = ((mask >> 3) & 1);
726 } // set_dstarg_writemask
727 
728 
allocate_scratch_register(Context * ctx)729 static int allocate_scratch_register(Context *ctx)
730 {
731     const int retval = ctx->scratch_registers++;
732     if (retval >= ctx->max_scratch_registers)
733         ctx->max_scratch_registers = retval + 1;
734     return retval;
735 } // allocate_scratch_register
736 
allocate_branch_label(Context * ctx)737 static int allocate_branch_label(Context *ctx)
738 {
739     return ctx->assigned_branch_labels++;
740 } // allocate_branch_label
741 
adjust_token_position(Context * ctx,const int incr)742 static inline void adjust_token_position(Context *ctx, const int incr)
743 {
744     ctx->tokens += incr;
745     ctx->tokencount -= incr;
746     ctx->current_position += incr * sizeof (uint32);
747 } // adjust_token_position
748 
749 
750 // D3D stuff that's used in more than just the d3d profile...
751 
isscalar(Context * ctx,const MOJOSHADER_shaderType shader_type,const RegisterType rtype,const int rnum)752 static int isscalar(Context *ctx, const MOJOSHADER_shaderType shader_type,
753                     const RegisterType rtype, const int rnum)
754 {
755     const int uses_psize = ctx->uses_pointsize;
756     const int uses_fog = ctx->uses_fog;
757     if ( (rtype == REG_TYPE_OUTPUT) && ((uses_psize) || (uses_fog)) )
758     {
759         const RegisterList *reg = reglist_find(&ctx->attributes, rtype, rnum);
760         if (reg != NULL)
761         {
762             const MOJOSHADER_usage usage = reg->usage;
763             return ( (uses_psize && (usage == MOJOSHADER_USAGE_POINTSIZE)) ||
764                      (uses_fog && (usage == MOJOSHADER_USAGE_FOG)) );
765         } // if
766     } // if
767 
768     return scalar_register(shader_type, rtype, rnum);
769 } // isscalar
770 
771 static const char swizzle_channels[] = { 'x', 'y', 'z', 'w' };
772 
773 
774 static const char *usagestrs[] = {
775     "_position", "_blendweight", "_blendindices", "_normal", "_psize",
776     "_texcoord", "_tangent", "_binormal", "_tessfactor", "_positiont",
777     "_color", "_fog", "_depth", "_sample"
778 };
779 
get_D3D_register_string(Context * ctx,RegisterType regtype,int regnum,char * regnum_str,size_t regnum_size)780 static const char *get_D3D_register_string(Context *ctx,
781                                            RegisterType regtype,
782                                            int regnum, char *regnum_str,
783                                            size_t regnum_size)
784 {
785     const char *retval = NULL;
786     int has_number = 1;
787 
788     switch (regtype)
789     {
790         case REG_TYPE_TEMP:
791             retval = "r";
792             break;
793 
794         case REG_TYPE_INPUT:
795             retval = "v";
796             break;
797 
798         case REG_TYPE_CONST:
799             retval = "c";
800             break;
801 
802         case REG_TYPE_ADDRESS:  // (or REG_TYPE_TEXTURE, same value.)
803             retval = shader_is_vertex(ctx) ? "a" : "t";
804             break;
805 
806         case REG_TYPE_RASTOUT:
807             switch ((RastOutType) regnum)
808             {
809                 case RASTOUT_TYPE_POSITION: retval = "oPos"; break;
810                 case RASTOUT_TYPE_FOG: retval = "oFog"; break;
811                 case RASTOUT_TYPE_POINT_SIZE: retval = "oPts"; break;
812             } // switch
813             has_number = 0;
814             break;
815 
816         case REG_TYPE_ATTROUT:
817             retval = "oD";
818             break;
819 
820         case REG_TYPE_OUTPUT: // (or REG_TYPE_TEXCRDOUT, same value.)
821             if (shader_is_vertex(ctx) && shader_version_atleast(ctx, 3, 0))
822                 retval = "o";
823             else
824                 retval = "oT";
825             break;
826 
827         case REG_TYPE_CONSTINT:
828             retval = "i";
829             break;
830 
831         case REG_TYPE_COLOROUT:
832             retval = "oC";
833             break;
834 
835         case REG_TYPE_DEPTHOUT:
836             retval = "oDepth";
837             has_number = 0;
838             break;
839 
840         case REG_TYPE_SAMPLER:
841             retval = "s";
842             break;
843 
844         case REG_TYPE_CONSTBOOL:
845             retval = "b";
846             break;
847 
848         case REG_TYPE_LOOP:
849             retval = "aL";
850             has_number = 0;
851             break;
852 
853         case REG_TYPE_MISCTYPE:
854             switch ((const MiscTypeType) regnum)
855             {
856                 case MISCTYPE_TYPE_POSITION: retval = "vPos"; break;
857                 case MISCTYPE_TYPE_FACE: retval = "vFace"; break;
858             } // switch
859             has_number = 0;
860             break;
861 
862         case REG_TYPE_LABEL:
863             retval = "l";
864             break;
865 
866         case REG_TYPE_PREDICATE:
867             retval = "p";
868             break;
869 
870         //case REG_TYPE_TEMPFLOAT16:  // !!! FIXME: don't know this asm string
871         default:
872             fail(ctx, "unknown register type");
873             retval = "???";
874             has_number = 0;
875             break;
876     } // switch
877 
878     if (has_number)
879         snprintf(regnum_str, regnum_size, "%u", (uint) regnum);
880     else
881         regnum_str[0] = '\0';
882 
883     return retval;
884 } // get_D3D_register_string
885 
886 
887 // !!! FIXME: can we split the profile code out to separate source files?
888 
889 #define AT_LEAST_ONE_PROFILE 0
890 
891 #if !SUPPORT_PROFILE_D3D
892 #define PROFILE_EMITTER_D3D(op)
893 #else
894 #undef AT_LEAST_ONE_PROFILE
895 #define AT_LEAST_ONE_PROFILE 1
896 #define PROFILE_EMITTER_D3D(op) emit_D3D_##op,
897 
make_D3D_srcarg_string_in_buf(Context * ctx,const SourceArgInfo * arg,char * buf,size_t buflen)898 static const char *make_D3D_srcarg_string_in_buf(Context *ctx,
899                                                  const SourceArgInfo *arg,
900                                                  char *buf, size_t buflen)
901 {
902     const char *premod_str = "";
903     const char *postmod_str = "";
904     switch (arg->src_mod)
905     {
906         case SRCMOD_NEGATE:
907             premod_str = "-";
908             break;
909 
910         case SRCMOD_BIASNEGATE:
911             premod_str = "-";
912             // fall through.
913         case SRCMOD_BIAS:
914             postmod_str = "_bias";
915             break;
916 
917         case SRCMOD_SIGNNEGATE:
918             premod_str = "-";
919             // fall through.
920         case SRCMOD_SIGN:
921             postmod_str = "_bx2";
922             break;
923 
924         case SRCMOD_COMPLEMENT:
925             premod_str = "1-";
926             break;
927 
928         case SRCMOD_X2NEGATE:
929             premod_str = "-";
930             // fall through.
931         case SRCMOD_X2:
932             postmod_str = "_x2";
933             break;
934 
935         case SRCMOD_DZ:
936             postmod_str = "_dz";
937             break;
938 
939         case SRCMOD_DW:
940             postmod_str = "_dw";
941             break;
942 
943         case SRCMOD_ABSNEGATE:
944             premod_str = "-";
945             // fall through.
946         case SRCMOD_ABS:
947             postmod_str = "_abs";
948             break;
949 
950         case SRCMOD_NOT:
951             premod_str = "!";
952             break;
953 
954         case SRCMOD_NONE:
955         case SRCMOD_TOTAL:
956              break;  // stop compiler whining.
957     } // switch
958 
959 
960     char regnum_str[16];
961     const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
962                                                       arg->regnum, regnum_str,
963                                                       sizeof (regnum_str));
964 
965     if (regtype_str == NULL)
966     {
967         fail(ctx, "Unknown source register type.");
968         *buf = '\0';
969         return buf;
970     } // if
971 
972     const char *rel_lbracket = "";
973     const char *rel_rbracket = "";
974     char rel_swizzle[4] = { '\0' };
975     char rel_regnum_str[16] = { '\0' };
976     const char *rel_regtype_str = "";
977     if (arg->relative)
978     {
979         rel_swizzle[0] = '.';
980         rel_swizzle[1] = swizzle_channels[arg->relative_component];
981         rel_swizzle[2] = '\0';
982         rel_lbracket = "[";
983         rel_rbracket = "]";
984         rel_regtype_str = get_D3D_register_string(ctx, arg->relative_regtype,
985                                                   arg->relative_regnum,
986                                                   rel_regnum_str,
987                                                   sizeof (rel_regnum_str));
988 
989         if (regtype_str == NULL)
990         {
991             fail(ctx, "Unknown relative source register type.");
992             *buf = '\0';
993             return buf;
994         } // if
995     } // if
996 
997     char swizzle_str[6];
998     size_t i = 0;
999     const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
1000     if (!scalar && !no_swizzle(arg->swizzle))
1001     {
1002         swizzle_str[i++] = '.';
1003         swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
1004         swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
1005         swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
1006         swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
1007 
1008         // .xyzz is the same as .xyz, .z is the same as .zzzz, etc.
1009         while (swizzle_str[i-1] == swizzle_str[i-2])
1010             i--;
1011     } // if
1012     swizzle_str[i] = '\0';
1013     assert(i < sizeof (swizzle_str));
1014 
1015     // !!! FIXME: c12[a0.x] actually needs to be c[a0.x + 12]
1016     snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s",
1017              premod_str, regtype_str, regnum_str, postmod_str,
1018              rel_lbracket, rel_regtype_str, rel_regnum_str, rel_swizzle,
1019              rel_rbracket, swizzle_str);
1020     // !!! FIXME: make sure the scratch buffer was large enough.
1021     return buf;
1022 } // make_D3D_srcarg_string_in_buf
1023 
1024 
make_D3D_destarg_string(Context * ctx,char * buf,const size_t buflen)1025 static const char *make_D3D_destarg_string(Context *ctx, char *buf,
1026                                            const size_t buflen)
1027 {
1028     const DestArgInfo *arg = &ctx->dest_arg;
1029 
1030     const char *result_shift_str = "";
1031     switch (arg->result_shift)
1032     {
1033         case 0x1: result_shift_str = "_x2"; break;
1034         case 0x2: result_shift_str = "_x4"; break;
1035         case 0x3: result_shift_str = "_x8"; break;
1036         case 0xD: result_shift_str = "_d8"; break;
1037         case 0xE: result_shift_str = "_d4"; break;
1038         case 0xF: result_shift_str = "_d2"; break;
1039     } // switch
1040 
1041     const char *sat_str = (arg->result_mod & MOD_SATURATE) ? "_sat" : "";
1042     const char *pp_str = (arg->result_mod & MOD_PP) ? "_pp" : "";
1043     const char *cent_str = (arg->result_mod & MOD_CENTROID) ? "_centroid" : "";
1044 
1045     char regnum_str[16];
1046     const char *regtype_str = get_D3D_register_string(ctx, arg->regtype,
1047                                                       arg->regnum, regnum_str,
1048                                                       sizeof (regnum_str));
1049     if (regtype_str == NULL)
1050     {
1051         fail(ctx, "Unknown destination register type.");
1052         *buf = '\0';
1053         return buf;
1054     } // if
1055 
1056     char writemask_str[6];
1057     size_t i = 0;
1058     const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
1059     if (!scalar && !writemask_xyzw(arg->writemask))
1060     {
1061         writemask_str[i++] = '.';
1062         if (arg->writemask0) writemask_str[i++] = 'x';
1063         if (arg->writemask1) writemask_str[i++] = 'y';
1064         if (arg->writemask2) writemask_str[i++] = 'z';
1065         if (arg->writemask3) writemask_str[i++] = 'w';
1066     } // if
1067     writemask_str[i] = '\0';
1068     assert(i < sizeof (writemask_str));
1069 
1070     const char *pred_left = "";
1071     const char *pred_right = "";
1072     char pred[32] = { '\0' };
1073     if (ctx->predicated)
1074     {
1075         pred_left = "(";
1076         pred_right = ") ";
1077         make_D3D_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
1078                                       pred, sizeof (pred));
1079     } // if
1080 
1081     // may turn out something like "_x2_sat_pp_centroid (!p0.x) r0.xyzw" ...
1082     snprintf(buf, buflen, "%s%s%s%s %s%s%s%s%s%s",
1083              result_shift_str, sat_str, pp_str, cent_str,
1084              pred_left, pred, pred_right,
1085              regtype_str, regnum_str, writemask_str);
1086     // !!! FIXME: make sure the scratch buffer was large enough.
1087     return buf;
1088 } // make_D3D_destarg_string
1089 
1090 
make_D3D_srcarg_string(Context * ctx,const size_t idx,char * buf,size_t buflen)1091 static const char *make_D3D_srcarg_string(Context *ctx, const size_t idx,
1092                                           char *buf, size_t buflen)
1093 {
1094     if (idx >= STATICARRAYLEN(ctx->source_args))
1095     {
1096         fail(ctx, "Too many source args");
1097         *buf = '\0';
1098         return buf;
1099     } // if
1100 
1101     const SourceArgInfo *arg = &ctx->source_args[idx];
1102     return make_D3D_srcarg_string_in_buf(ctx, arg, buf, buflen);
1103 } // make_D3D_srcarg_string
1104 
get_D3D_varname_in_buf(Context * ctx,RegisterType rt,int regnum,char * buf,const size_t len)1105 static const char *get_D3D_varname_in_buf(Context *ctx, RegisterType rt,
1106                                            int regnum, char *buf,
1107                                            const size_t len)
1108 {
1109     char regnum_str[16];
1110     const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
1111                                               regnum_str, sizeof (regnum_str));
1112     snprintf(buf,len,"%s%s", regtype_str, regnum_str);
1113     return buf;
1114 } // get_D3D_varname_in_buf
1115 
1116 
get_D3D_varname(Context * ctx,RegisterType rt,int regnum)1117 static const char *get_D3D_varname(Context *ctx, RegisterType rt, int regnum)
1118 {
1119     char buf[64];
1120     get_D3D_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf));
1121     return StrDup(ctx, buf);
1122 } // get_D3D_varname
1123 
1124 
get_D3D_const_array_varname(Context * ctx,int base,int size)1125 static const char *get_D3D_const_array_varname(Context *ctx, int base, int size)
1126 {
1127     char buf[64];
1128     snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size);
1129     return StrDup(ctx, buf);
1130 } // get_D3D_const_array_varname
1131 
1132 
emit_D3D_start(Context * ctx,const char * profilestr)1133 static void emit_D3D_start(Context *ctx, const char *profilestr)
1134 {
1135     const uint major = (uint) ctx->major_ver;
1136     const uint minor = (uint) ctx->minor_ver;
1137     char minor_str[16];
1138 
1139     ctx->ignores_ctab = 1;
1140 
1141     if (minor == 0xFF)
1142         strcpy(minor_str, "sw");
1143     else if ((major > 1) && (minor == 1))
1144         strcpy(minor_str, "x");  // for >= SM2, apparently this is "x". Weird.
1145     else
1146         snprintf(minor_str, sizeof (minor_str), "%u", (uint) minor);
1147 
1148     output_line(ctx, "%s_%u_%s", ctx->shader_type_str, major, minor_str);
1149 } // emit_D3D_start
1150 
1151 
emit_D3D_end(Context * ctx)1152 static void emit_D3D_end(Context *ctx)
1153 {
1154     output_line(ctx, "end");
1155 } // emit_D3D_end
1156 
1157 
emit_D3D_phase(Context * ctx)1158 static void emit_D3D_phase(Context *ctx)
1159 {
1160     output_line(ctx, "phase");
1161 } // emit_D3D_phase
1162 
1163 
emit_D3D_finalize(Context * ctx)1164 static void emit_D3D_finalize(Context *ctx)
1165 {
1166     // no-op.
1167 } // emit_D3D_finalize
1168 
1169 
emit_D3D_global(Context * ctx,RegisterType regtype,int regnum)1170 static void emit_D3D_global(Context *ctx, RegisterType regtype, int regnum)
1171 {
1172     // no-op.
1173 } // emit_D3D_global
1174 
1175 
emit_D3D_array(Context * ctx,VariableList * var)1176 static void emit_D3D_array(Context *ctx, VariableList *var)
1177 {
1178     // no-op.
1179 } // emit_D3D_array
1180 
1181 
emit_D3D_const_array(Context * ctx,const ConstantsList * clist,int base,int size)1182 static void emit_D3D_const_array(Context *ctx, const ConstantsList *clist,
1183                                  int base, int size)
1184 {
1185     // no-op.
1186 } // emit_D3D_const_array
1187 
1188 
emit_D3D_uniform(Context * ctx,RegisterType regtype,int regnum,const VariableList * var)1189 static void emit_D3D_uniform(Context *ctx, RegisterType regtype, int regnum,
1190                              const VariableList *var)
1191 {
1192     // no-op.
1193 } // emit_D3D_uniform
1194 
1195 
emit_D3D_sampler(Context * ctx,int s,TextureType ttype,int tb)1196 static void emit_D3D_sampler(Context *ctx, int s, TextureType ttype, int tb)
1197 {
1198     // no-op.
1199 } // emit_D3D_sampler
1200 
1201 
emit_D3D_attribute(Context * ctx,RegisterType regtype,int regnum,MOJOSHADER_usage usage,int index,int wmask,int flags)1202 static void emit_D3D_attribute(Context *ctx, RegisterType regtype, int regnum,
1203                                MOJOSHADER_usage usage, int index, int wmask,
1204                                int flags)
1205 {
1206     // no-op.
1207 } // emit_D3D_attribute
1208 
1209 
emit_D3D_RESERVED(Context * ctx)1210 static void emit_D3D_RESERVED(Context *ctx)
1211 {
1212     // do nothing; fails in the state machine.
1213 } // emit_D3D_RESERVED
1214 
1215 
1216 // Generic D3D opcode emitters. A list of macros generate all the entry points
1217 //  that call into these...
1218 
lowercase(char * dst,const char * src)1219 static char *lowercase(char *dst, const char *src)
1220 {
1221     int i = 0;
1222     do
1223     {
1224         const char ch = src[i];
1225         dst[i] = (((ch >= 'A') && (ch <= 'Z')) ? (ch - ('A' - 'a')) : ch);
1226     } while (src[i++]);
1227     return dst;
1228 } // lowercase
1229 
1230 
emit_D3D_opcode_d(Context * ctx,const char * opcode)1231 static void emit_D3D_opcode_d(Context *ctx, const char *opcode)
1232 {
1233     char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
1234     opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1235     output_line(ctx, "%s%s%s", ctx->coissue ? "+" : "", opcode, dst);
1236 } // emit_D3D_opcode_d
1237 
1238 
emit_D3D_opcode_s(Context * ctx,const char * opcode)1239 static void emit_D3D_opcode_s(Context *ctx, const char *opcode)
1240 {
1241     char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1242     opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1243     output_line(ctx, "%s%s %s", ctx->coissue ? "+" : "", opcode, src0);
1244 } // emit_D3D_opcode_s
1245 
1246 
emit_D3D_opcode_ss(Context * ctx,const char * opcode)1247 static void emit_D3D_opcode_ss(Context *ctx, const char *opcode)
1248 {
1249     char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1250     char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
1251     opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1252     output_line(ctx, "%s%s %s, %s", ctx->coissue ? "+" : "", opcode, src0, src1);
1253 } // emit_D3D_opcode_ss
1254 
1255 
emit_D3D_opcode_ds(Context * ctx,const char * opcode)1256 static void emit_D3D_opcode_ds(Context *ctx, const char *opcode)
1257 {
1258     char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
1259     char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1260     opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1261     output_line(ctx, "%s%s%s, %s", ctx->coissue ? "+" : "", opcode, dst, src0);
1262 } // emit_D3D_opcode_ds
1263 
1264 
emit_D3D_opcode_dss(Context * ctx,const char * opcode)1265 static void emit_D3D_opcode_dss(Context *ctx, const char *opcode)
1266 {
1267     char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
1268     char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1269     char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
1270     opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1271     output_line(ctx, "%s%s%s, %s, %s", ctx->coissue ? "+" : "",
1272                 opcode, dst, src0, src1);
1273 } // emit_D3D_opcode_dss
1274 
1275 
emit_D3D_opcode_dsss(Context * ctx,const char * opcode)1276 static void emit_D3D_opcode_dsss(Context *ctx, const char *opcode)
1277 {
1278     char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
1279     char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1280     char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
1281     char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2));
1282     opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1283     output_line(ctx, "%s%s%s, %s, %s, %s", ctx->coissue ? "+" : "",
1284                 opcode, dst, src0, src1, src2);
1285 } // emit_D3D_opcode_dsss
1286 
1287 
emit_D3D_opcode_dssss(Context * ctx,const char * opcode)1288 static void emit_D3D_opcode_dssss(Context *ctx, const char *opcode)
1289 {
1290     char dst[64]; make_D3D_destarg_string(ctx, dst, sizeof (dst));
1291     char src0[64]; make_D3D_srcarg_string(ctx, 0, src0, sizeof (src0));
1292     char src1[64]; make_D3D_srcarg_string(ctx, 1, src1, sizeof (src1));
1293     char src2[64]; make_D3D_srcarg_string(ctx, 2, src2, sizeof (src2));
1294     char src3[64]; make_D3D_srcarg_string(ctx, 3, src3, sizeof (src3));
1295     opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1296     output_line(ctx,"%s%s%s, %s, %s, %s, %s", ctx->coissue ? "+" : "",
1297                 opcode, dst, src0, src1, src2, src3);
1298 } // emit_D3D_opcode_dssss
1299 
1300 
emit_D3D_opcode(Context * ctx,const char * opcode)1301 static void emit_D3D_opcode(Context *ctx, const char *opcode)
1302 {
1303     opcode = lowercase((char *) alloca(strlen(opcode) + 1), opcode);
1304     output_line(ctx, "%s%s", ctx->coissue ? "+" : "", opcode);
1305 } // emit_D3D_opcode
1306 
1307 
1308 #define EMIT_D3D_OPCODE_FUNC(op) \
1309     static void emit_D3D_##op(Context *ctx) { \
1310         emit_D3D_opcode(ctx, #op); \
1311     }
1312 #define EMIT_D3D_OPCODE_D_FUNC(op) \
1313     static void emit_D3D_##op(Context *ctx) { \
1314         emit_D3D_opcode_d(ctx, #op); \
1315     }
1316 #define EMIT_D3D_OPCODE_S_FUNC(op) \
1317     static void emit_D3D_##op(Context *ctx) { \
1318         emit_D3D_opcode_s(ctx, #op); \
1319     }
1320 #define EMIT_D3D_OPCODE_SS_FUNC(op) \
1321     static void emit_D3D_##op(Context *ctx) { \
1322         emit_D3D_opcode_ss(ctx, #op); \
1323     }
1324 #define EMIT_D3D_OPCODE_DS_FUNC(op) \
1325     static void emit_D3D_##op(Context *ctx) { \
1326         emit_D3D_opcode_ds(ctx, #op); \
1327     }
1328 #define EMIT_D3D_OPCODE_DSS_FUNC(op) \
1329     static void emit_D3D_##op(Context *ctx) { \
1330         emit_D3D_opcode_dss(ctx, #op); \
1331     }
1332 #define EMIT_D3D_OPCODE_DSSS_FUNC(op) \
1333     static void emit_D3D_##op(Context *ctx) { \
1334         emit_D3D_opcode_dsss(ctx, #op); \
1335     }
1336 #define EMIT_D3D_OPCODE_DSSSS_FUNC(op) \
1337     static void emit_D3D_##op(Context *ctx) { \
1338         emit_D3D_opcode_dssss(ctx, #op); \
1339     }
1340 
1341 EMIT_D3D_OPCODE_FUNC(NOP)
EMIT_D3D_OPCODE_DS_FUNC(MOV)1342 EMIT_D3D_OPCODE_DS_FUNC(MOV)
1343 EMIT_D3D_OPCODE_DSS_FUNC(ADD)
1344 EMIT_D3D_OPCODE_DSS_FUNC(SUB)
1345 EMIT_D3D_OPCODE_DSSS_FUNC(MAD)
1346 EMIT_D3D_OPCODE_DSS_FUNC(MUL)
1347 EMIT_D3D_OPCODE_DS_FUNC(RCP)
1348 EMIT_D3D_OPCODE_DS_FUNC(RSQ)
1349 EMIT_D3D_OPCODE_DSS_FUNC(DP3)
1350 EMIT_D3D_OPCODE_DSS_FUNC(DP4)
1351 EMIT_D3D_OPCODE_DSS_FUNC(MIN)
1352 EMIT_D3D_OPCODE_DSS_FUNC(MAX)
1353 EMIT_D3D_OPCODE_DSS_FUNC(SLT)
1354 EMIT_D3D_OPCODE_DSS_FUNC(SGE)
1355 EMIT_D3D_OPCODE_DS_FUNC(EXP)
1356 EMIT_D3D_OPCODE_DS_FUNC(LOG)
1357 EMIT_D3D_OPCODE_DS_FUNC(LIT)
1358 EMIT_D3D_OPCODE_DSS_FUNC(DST)
1359 EMIT_D3D_OPCODE_DSSS_FUNC(LRP)
1360 EMIT_D3D_OPCODE_DS_FUNC(FRC)
1361 EMIT_D3D_OPCODE_DSS_FUNC(M4X4)
1362 EMIT_D3D_OPCODE_DSS_FUNC(M4X3)
1363 EMIT_D3D_OPCODE_DSS_FUNC(M3X4)
1364 EMIT_D3D_OPCODE_DSS_FUNC(M3X3)
1365 EMIT_D3D_OPCODE_DSS_FUNC(M3X2)
1366 EMIT_D3D_OPCODE_S_FUNC(CALL)
1367 EMIT_D3D_OPCODE_SS_FUNC(CALLNZ)
1368 EMIT_D3D_OPCODE_SS_FUNC(LOOP)
1369 EMIT_D3D_OPCODE_FUNC(RET)
1370 EMIT_D3D_OPCODE_FUNC(ENDLOOP)
1371 EMIT_D3D_OPCODE_S_FUNC(LABEL)
1372 EMIT_D3D_OPCODE_DSS_FUNC(POW)
1373 EMIT_D3D_OPCODE_DSS_FUNC(CRS)
1374 EMIT_D3D_OPCODE_DSSS_FUNC(SGN)
1375 EMIT_D3D_OPCODE_DS_FUNC(ABS)
1376 EMIT_D3D_OPCODE_DS_FUNC(NRM)
1377 EMIT_D3D_OPCODE_S_FUNC(REP)
1378 EMIT_D3D_OPCODE_FUNC(ENDREP)
1379 EMIT_D3D_OPCODE_S_FUNC(IF)
1380 EMIT_D3D_OPCODE_FUNC(ELSE)
1381 EMIT_D3D_OPCODE_FUNC(ENDIF)
1382 EMIT_D3D_OPCODE_FUNC(BREAK)
1383 EMIT_D3D_OPCODE_DS_FUNC(MOVA)
1384 EMIT_D3D_OPCODE_D_FUNC(TEXKILL)
1385 EMIT_D3D_OPCODE_DS_FUNC(TEXBEM)
1386 EMIT_D3D_OPCODE_DS_FUNC(TEXBEML)
1387 EMIT_D3D_OPCODE_DS_FUNC(TEXREG2AR)
1388 EMIT_D3D_OPCODE_DS_FUNC(TEXREG2GB)
1389 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2PAD)
1390 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2TEX)
1391 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3PAD)
1392 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3TEX)
1393 EMIT_D3D_OPCODE_DSS_FUNC(TEXM3X3SPEC)
1394 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3VSPEC)
1395 EMIT_D3D_OPCODE_DS_FUNC(EXPP)
1396 EMIT_D3D_OPCODE_DS_FUNC(LOGP)
1397 EMIT_D3D_OPCODE_DSSS_FUNC(CND)
1398 EMIT_D3D_OPCODE_DS_FUNC(TEXREG2RGB)
1399 EMIT_D3D_OPCODE_DS_FUNC(TEXDP3TEX)
1400 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X2DEPTH)
1401 EMIT_D3D_OPCODE_DS_FUNC(TEXDP3)
1402 EMIT_D3D_OPCODE_DS_FUNC(TEXM3X3)
1403 EMIT_D3D_OPCODE_D_FUNC(TEXDEPTH)
1404 EMIT_D3D_OPCODE_DSSS_FUNC(CMP)
1405 EMIT_D3D_OPCODE_DSS_FUNC(BEM)
1406 EMIT_D3D_OPCODE_DSSS_FUNC(DP2ADD)
1407 EMIT_D3D_OPCODE_DS_FUNC(DSX)
1408 EMIT_D3D_OPCODE_DS_FUNC(DSY)
1409 EMIT_D3D_OPCODE_DSSSS_FUNC(TEXLDD)
1410 EMIT_D3D_OPCODE_DSS_FUNC(TEXLDL)
1411 EMIT_D3D_OPCODE_S_FUNC(BREAKP)
1412 
1413 // special cases for comparison opcodes...
1414 static const char *get_D3D_comparison_string(Context *ctx)
1415 {
1416     static const char *comps[] = {
1417         "", "_gt", "_eq", "_ge", "_lt", "_ne", "_le"
1418     };
1419 
1420     if (ctx->instruction_controls >= STATICARRAYLEN(comps))
1421     {
1422         fail(ctx, "unknown comparison control");
1423         return "";
1424     } // if
1425 
1426     return comps[ctx->instruction_controls];
1427 } // get_D3D_comparison_string
1428 
emit_D3D_BREAKC(Context * ctx)1429 static void emit_D3D_BREAKC(Context *ctx)
1430 {
1431     char op[16];
1432     snprintf(op, sizeof (op), "break%s", get_D3D_comparison_string(ctx));
1433     emit_D3D_opcode_ss(ctx, op);
1434 } // emit_D3D_BREAKC
1435 
emit_D3D_IFC(Context * ctx)1436 static void emit_D3D_IFC(Context *ctx)
1437 {
1438     char op[16];
1439     snprintf(op, sizeof (op), "if%s", get_D3D_comparison_string(ctx));
1440     emit_D3D_opcode_ss(ctx, op);
1441 } // emit_D3D_IFC
1442 
emit_D3D_SETP(Context * ctx)1443 static void emit_D3D_SETP(Context *ctx)
1444 {
1445     char op[16];
1446     snprintf(op, sizeof (op), "setp%s", get_D3D_comparison_string(ctx));
1447     emit_D3D_opcode_dss(ctx, op);
1448 } // emit_D3D_SETP
1449 
emit_D3D_DEF(Context * ctx)1450 static void emit_D3D_DEF(Context *ctx)
1451 {
1452     char dst[64];
1453     make_D3D_destarg_string(ctx, dst, sizeof (dst));
1454     const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
1455     char val0[32];
1456     char val1[32];
1457     char val2[32];
1458     char val3[32];
1459     floatstr(ctx, val0, sizeof (val0), val[0], 0);
1460     floatstr(ctx, val1, sizeof (val1), val[1], 0);
1461     floatstr(ctx, val2, sizeof (val2), val[2], 0);
1462     floatstr(ctx, val3, sizeof (val3), val[3], 0);
1463     output_line(ctx, "def%s, %s, %s, %s, %s", dst, val0, val1, val2, val3);
1464 } // emit_D3D_DEF
1465 
emit_D3D_DEFI(Context * ctx)1466 static void emit_D3D_DEFI(Context *ctx)
1467 {
1468     char dst[64];
1469     make_D3D_destarg_string(ctx, dst, sizeof (dst));
1470     const int32 *x = (const int32 *) ctx->dwords;
1471     output_line(ctx, "defi%s, %d, %d, %d, %d", dst,
1472                 (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
1473 } // emit_D3D_DEFI
1474 
emit_D3D_DEFB(Context * ctx)1475 static void emit_D3D_DEFB(Context *ctx)
1476 {
1477     char dst[64];
1478     make_D3D_destarg_string(ctx, dst, sizeof (dst));
1479     output_line(ctx, "defb%s, %s", dst, ctx->dwords[0] ? "true" : "false");
1480 } // emit_D3D_DEFB
1481 
1482 
emit_D3D_DCL(Context * ctx)1483 static void emit_D3D_DCL(Context *ctx)
1484 {
1485     char dst[64];
1486     make_D3D_destarg_string(ctx, dst, sizeof (dst));
1487     const DestArgInfo *arg = &ctx->dest_arg;
1488     const char *usage_str = "";
1489     char index_str[16] = { '\0' };
1490 
1491     if (arg->regtype == REG_TYPE_SAMPLER)
1492     {
1493         switch ((const TextureType) ctx->dwords[0])
1494         {
1495             case TEXTURE_TYPE_2D: usage_str = "_2d"; break;
1496             case TEXTURE_TYPE_CUBE: usage_str = "_cube"; break;
1497             case TEXTURE_TYPE_VOLUME: usage_str = "_volume"; break;
1498             default: fail(ctx, "unknown sampler texture type"); return;
1499         } // switch
1500     } // if
1501 
1502     else if (arg->regtype == REG_TYPE_MISCTYPE)
1503     {
1504         switch ((const MiscTypeType) arg->regnum)
1505         {
1506             case MISCTYPE_TYPE_POSITION:
1507             case MISCTYPE_TYPE_FACE:
1508                 usage_str = "";  // just become "dcl vFace" or whatever.
1509                 break;
1510             default: fail(ctx, "unknown misc register type"); return;
1511         } // switch
1512     } // else if
1513 
1514     else
1515     {
1516         const uint32 usage = ctx->dwords[0];
1517         const uint32 index = ctx->dwords[1];
1518         usage_str = usagestrs[usage];
1519         if (index != 0)
1520             snprintf(index_str, sizeof (index_str), "%u", (uint) index);
1521     } // else
1522 
1523     output_line(ctx, "dcl%s%s%s", usage_str, index_str, dst);
1524 } // emit_D3D_DCL
1525 
1526 
emit_D3D_TEXCRD(Context * ctx)1527 static void emit_D3D_TEXCRD(Context *ctx)
1528 {
1529     // this opcode looks and acts differently depending on the shader model.
1530     if (shader_version_atleast(ctx, 1, 4))
1531         emit_D3D_opcode_ds(ctx, "texcrd");
1532     else
1533         emit_D3D_opcode_d(ctx, "texcoord");
1534 } // emit_D3D_TEXCOORD
1535 
emit_D3D_TEXLD(Context * ctx)1536 static void emit_D3D_TEXLD(Context *ctx)
1537 {
1538     // this opcode looks and acts differently depending on the shader model.
1539     if (shader_version_atleast(ctx, 2, 0))
1540     {
1541         if (ctx->instruction_controls == CONTROL_TEXLD)
1542            emit_D3D_opcode_dss(ctx, "texld");
1543         else if (ctx->instruction_controls == CONTROL_TEXLDP)
1544            emit_D3D_opcode_dss(ctx, "texldp");
1545         else if (ctx->instruction_controls == CONTROL_TEXLDB)
1546            emit_D3D_opcode_dss(ctx, "texldb");
1547     } // if
1548 
1549     else if (shader_version_atleast(ctx, 1, 4))
1550     {
1551         emit_D3D_opcode_ds(ctx, "texld");
1552     } // else if
1553 
1554     else
1555     {
1556         emit_D3D_opcode_d(ctx, "tex");
1557     } // else
1558 } // emit_D3D_TEXLD
1559 
emit_D3D_SINCOS(Context * ctx)1560 static void emit_D3D_SINCOS(Context *ctx)
1561 {
1562     // this opcode needs extra registers for sm2 and lower.
1563     if (!shader_version_atleast(ctx, 3, 0))
1564         emit_D3D_opcode_dsss(ctx, "sincos");
1565     else
1566         emit_D3D_opcode_ds(ctx, "sincos");
1567 } // emit_D3D_SINCOS
1568 
1569 
1570 #undef EMIT_D3D_OPCODE_FUNC
1571 #undef EMIT_D3D_OPCODE_D_FUNC
1572 #undef EMIT_D3D_OPCODE_S_FUNC
1573 #undef EMIT_D3D_OPCODE_SS_FUNC
1574 #undef EMIT_D3D_OPCODE_DS_FUNC
1575 #undef EMIT_D3D_OPCODE_DSS_FUNC
1576 #undef EMIT_D3D_OPCODE_DSSS_FUNC
1577 #undef EMIT_D3D_OPCODE_DSSSS_FUNC
1578 
1579 #endif  // SUPPORT_PROFILE_D3D
1580 
1581 
1582 #if !SUPPORT_PROFILE_BYTECODE
1583 #define PROFILE_EMITTER_BYTECODE(op)
1584 #else
1585 #undef AT_LEAST_ONE_PROFILE
1586 #define AT_LEAST_ONE_PROFILE 1
1587 #define PROFILE_EMITTER_BYTECODE(op) emit_BYTECODE_##op,
1588 
emit_BYTECODE_start(Context * ctx,const char * profilestr)1589 static void emit_BYTECODE_start(Context *ctx, const char *profilestr)
1590 {
1591     ctx->ignores_ctab = 1;
1592 
1593     // just copy the whole token stream and make all other emitters no-ops.
1594     if (set_output(ctx, &ctx->mainline))
1595     {
1596         const size_t len = ctx->tokencount * sizeof (uint32);
1597         buffer_append(ctx->mainline, (const char *) ctx->tokens, len);
1598     } // if
1599 } // emit_BYTECODE_start
1600 
emit_BYTECODE_end(Context * ctx)1601 static void emit_BYTECODE_end(Context *ctx) {}
emit_BYTECODE_phase(Context * ctx)1602 static void emit_BYTECODE_phase(Context *ctx) {}
emit_BYTECODE_finalize(Context * ctx)1603 static void emit_BYTECODE_finalize(Context *ctx) {}
emit_BYTECODE_global(Context * ctx,RegisterType t,int n)1604 static void emit_BYTECODE_global(Context *ctx, RegisterType t, int n) {}
emit_BYTECODE_array(Context * ctx,VariableList * var)1605 static void emit_BYTECODE_array(Context *ctx, VariableList *var) {}
emit_BYTECODE_sampler(Context * c,int s,TextureType t,int tb)1606 static void emit_BYTECODE_sampler(Context *c, int s, TextureType t, int tb) {}
emit_BYTECODE_const_array(Context * ctx,const ConstantsList * c,int base,int size)1607 static void emit_BYTECODE_const_array(Context *ctx, const ConstantsList *c,
1608                                          int base, int size) {}
emit_BYTECODE_uniform(Context * ctx,RegisterType t,int n,const VariableList * var)1609 static void emit_BYTECODE_uniform(Context *ctx, RegisterType t, int n,
1610                                   const VariableList *var) {}
emit_BYTECODE_attribute(Context * ctx,RegisterType t,int n,MOJOSHADER_usage u,int i,int w,int f)1611 static void emit_BYTECODE_attribute(Context *ctx, RegisterType t, int n,
1612                                        MOJOSHADER_usage u, int i, int w,
1613                                        int f) {}
1614 
get_BYTECODE_varname(Context * ctx,RegisterType rt,int regnum)1615 static const char *get_BYTECODE_varname(Context *ctx, RegisterType rt, int regnum)
1616 {
1617     char regnum_str[16];
1618     const char *regtype_str = get_D3D_register_string(ctx, rt, regnum,
1619                                               regnum_str, sizeof (regnum_str));
1620     char buf[64];
1621     snprintf(buf, sizeof (buf), "%s%s", regtype_str, regnum_str);
1622     return StrDup(ctx, buf);
1623 } // get_BYTECODE_varname
1624 
get_BYTECODE_const_array_varname(Context * ctx,int base,int size)1625 static const char *get_BYTECODE_const_array_varname(Context *ctx, int base, int size)
1626 {
1627     char buf[64];
1628     snprintf(buf, sizeof (buf), "c_array_%d_%d", base, size);
1629     return StrDup(ctx, buf);
1630 } // get_BYTECODE_const_array_varname
1631 
1632 #define EMIT_BYTECODE_OPCODE_FUNC(op) \
1633     static void emit_BYTECODE_##op(Context *ctx) {}
1634 
1635 EMIT_BYTECODE_OPCODE_FUNC(RESERVED)
1636 EMIT_BYTECODE_OPCODE_FUNC(NOP)
1637 EMIT_BYTECODE_OPCODE_FUNC(MOV)
1638 EMIT_BYTECODE_OPCODE_FUNC(ADD)
1639 EMIT_BYTECODE_OPCODE_FUNC(SUB)
1640 EMIT_BYTECODE_OPCODE_FUNC(MAD)
1641 EMIT_BYTECODE_OPCODE_FUNC(MUL)
1642 EMIT_BYTECODE_OPCODE_FUNC(RCP)
1643 EMIT_BYTECODE_OPCODE_FUNC(RSQ)
1644 EMIT_BYTECODE_OPCODE_FUNC(DP3)
1645 EMIT_BYTECODE_OPCODE_FUNC(DP4)
1646 EMIT_BYTECODE_OPCODE_FUNC(MIN)
1647 EMIT_BYTECODE_OPCODE_FUNC(MAX)
1648 EMIT_BYTECODE_OPCODE_FUNC(SLT)
1649 EMIT_BYTECODE_OPCODE_FUNC(SGE)
1650 EMIT_BYTECODE_OPCODE_FUNC(EXP)
1651 EMIT_BYTECODE_OPCODE_FUNC(LOG)
1652 EMIT_BYTECODE_OPCODE_FUNC(LIT)
1653 EMIT_BYTECODE_OPCODE_FUNC(DST)
1654 EMIT_BYTECODE_OPCODE_FUNC(LRP)
1655 EMIT_BYTECODE_OPCODE_FUNC(FRC)
1656 EMIT_BYTECODE_OPCODE_FUNC(M4X4)
1657 EMIT_BYTECODE_OPCODE_FUNC(M4X3)
1658 EMIT_BYTECODE_OPCODE_FUNC(M3X4)
1659 EMIT_BYTECODE_OPCODE_FUNC(M3X3)
1660 EMIT_BYTECODE_OPCODE_FUNC(M3X2)
1661 EMIT_BYTECODE_OPCODE_FUNC(CALL)
1662 EMIT_BYTECODE_OPCODE_FUNC(CALLNZ)
1663 EMIT_BYTECODE_OPCODE_FUNC(LOOP)
1664 EMIT_BYTECODE_OPCODE_FUNC(RET)
1665 EMIT_BYTECODE_OPCODE_FUNC(ENDLOOP)
1666 EMIT_BYTECODE_OPCODE_FUNC(LABEL)
1667 EMIT_BYTECODE_OPCODE_FUNC(POW)
1668 EMIT_BYTECODE_OPCODE_FUNC(CRS)
1669 EMIT_BYTECODE_OPCODE_FUNC(SGN)
1670 EMIT_BYTECODE_OPCODE_FUNC(ABS)
1671 EMIT_BYTECODE_OPCODE_FUNC(NRM)
1672 EMIT_BYTECODE_OPCODE_FUNC(SINCOS)
1673 EMIT_BYTECODE_OPCODE_FUNC(REP)
1674 EMIT_BYTECODE_OPCODE_FUNC(ENDREP)
1675 EMIT_BYTECODE_OPCODE_FUNC(IF)
1676 EMIT_BYTECODE_OPCODE_FUNC(ELSE)
1677 EMIT_BYTECODE_OPCODE_FUNC(ENDIF)
1678 EMIT_BYTECODE_OPCODE_FUNC(BREAK)
1679 EMIT_BYTECODE_OPCODE_FUNC(MOVA)
1680 EMIT_BYTECODE_OPCODE_FUNC(TEXKILL)
1681 EMIT_BYTECODE_OPCODE_FUNC(TEXBEM)
1682 EMIT_BYTECODE_OPCODE_FUNC(TEXBEML)
1683 EMIT_BYTECODE_OPCODE_FUNC(TEXREG2AR)
1684 EMIT_BYTECODE_OPCODE_FUNC(TEXREG2GB)
1685 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2PAD)
1686 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2TEX)
1687 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3PAD)
1688 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3TEX)
1689 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3SPEC)
1690 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3VSPEC)
1691 EMIT_BYTECODE_OPCODE_FUNC(EXPP)
1692 EMIT_BYTECODE_OPCODE_FUNC(LOGP)
1693 EMIT_BYTECODE_OPCODE_FUNC(CND)
1694 EMIT_BYTECODE_OPCODE_FUNC(TEXREG2RGB)
1695 EMIT_BYTECODE_OPCODE_FUNC(TEXDP3TEX)
1696 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X2DEPTH)
1697 EMIT_BYTECODE_OPCODE_FUNC(TEXDP3)
1698 EMIT_BYTECODE_OPCODE_FUNC(TEXM3X3)
1699 EMIT_BYTECODE_OPCODE_FUNC(TEXDEPTH)
1700 EMIT_BYTECODE_OPCODE_FUNC(CMP)
1701 EMIT_BYTECODE_OPCODE_FUNC(BEM)
1702 EMIT_BYTECODE_OPCODE_FUNC(DP2ADD)
1703 EMIT_BYTECODE_OPCODE_FUNC(DSX)
1704 EMIT_BYTECODE_OPCODE_FUNC(DSY)
1705 EMIT_BYTECODE_OPCODE_FUNC(TEXLDD)
1706 EMIT_BYTECODE_OPCODE_FUNC(TEXLDL)
1707 EMIT_BYTECODE_OPCODE_FUNC(BREAKP)
1708 EMIT_BYTECODE_OPCODE_FUNC(BREAKC)
1709 EMIT_BYTECODE_OPCODE_FUNC(IFC)
1710 EMIT_BYTECODE_OPCODE_FUNC(SETP)
1711 EMIT_BYTECODE_OPCODE_FUNC(DEF)
1712 EMIT_BYTECODE_OPCODE_FUNC(DEFI)
1713 EMIT_BYTECODE_OPCODE_FUNC(DEFB)
1714 EMIT_BYTECODE_OPCODE_FUNC(DCL)
1715 EMIT_BYTECODE_OPCODE_FUNC(TEXCRD)
1716 EMIT_BYTECODE_OPCODE_FUNC(TEXLD)
1717 
1718 #undef EMIT_BYTECODE_OPCODE_FUNC
1719 
1720 #endif  // SUPPORT_PROFILE_BYTECODE
1721 
1722 
1723 #if !SUPPORT_PROFILE_GLSL
1724 #define PROFILE_EMITTER_GLSL(op)
1725 #else
1726 #undef AT_LEAST_ONE_PROFILE
1727 #define AT_LEAST_ONE_PROFILE 1
1728 #define PROFILE_EMITTER_GLSL(op) emit_GLSL_##op,
1729 
1730 #define EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(op) \
1731     static void emit_GLSL_##op(Context *ctx) { \
1732         fail(ctx, #op " unimplemented in glsl profile"); \
1733     }
1734 
1735 static inline const char *get_GLSL_register_string(Context *ctx,
1736                         const RegisterType regtype, const int regnum,
1737                         char *regnum_str, const size_t regnum_size)
1738 {
1739     // turns out these are identical at the moment.
1740     return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
1741 } // get_GLSL_register_string
1742 
1743 static const char *get_GLSL_uniform_type(Context *ctx, const RegisterType rtype)
1744 {
1745     switch (rtype)
1746     {
1747         case REG_TYPE_CONST: return "vec4";
1748         case REG_TYPE_CONSTINT: return "ivec4";
1749         case REG_TYPE_CONSTBOOL: return "bool";
1750         default: fail(ctx, "BUG: used a uniform we don't know how to define.");
1751     } // switch
1752 
1753     return NULL;
1754 } // get_GLSL_uniform_type
1755 
1756 static const char *get_GLSL_varname_in_buf(Context *ctx, RegisterType rt,
1757                                            int regnum, char *buf,
1758                                            const size_t len)
1759 {
1760     char regnum_str[16];
1761     const char *regtype_str = get_GLSL_register_string(ctx, rt, regnum,
1762                                               regnum_str, sizeof (regnum_str));
1763     snprintf(buf,len,"%s_%s%s", ctx->shader_type_str, regtype_str, regnum_str);
1764     return buf;
1765 } // get_GLSL_varname_in_buf
1766 
1767 
1768 static const char *get_GLSL_varname(Context *ctx, RegisterType rt, int regnum)
1769 {
1770     char buf[64];
1771     get_GLSL_varname_in_buf(ctx, rt, regnum, buf, sizeof (buf));
1772     return StrDup(ctx, buf);
1773 } // get_GLSL_varname
1774 
1775 
1776 static inline const char *get_GLSL_const_array_varname_in_buf(Context *ctx,
1777                                                 const int base, const int size,
1778                                                 char *buf, const size_t buflen)
1779 {
1780     const char *type = ctx->shader_type_str;
1781     snprintf(buf, buflen, "%s_const_array_%d_%d", type, base, size);
1782     return buf;
1783 } // get_GLSL_const_array_varname_in_buf
1784 
1785 static const char *get_GLSL_const_array_varname(Context *ctx, int base, int size)
1786 {
1787     char buf[64];
1788     get_GLSL_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
1789     return StrDup(ctx, buf);
1790 } // get_GLSL_const_array_varname
1791 
1792 
1793 static inline const char *get_GLSL_input_array_varname(Context *ctx,
1794                                                 char *buf, const size_t buflen)
1795 {
1796     snprintf(buf, buflen, "%s", "vertex_input_array");
1797     return buf;
1798 } // get_GLSL_input_array_varname
1799 
1800 
1801 static const char *get_GLSL_uniform_array_varname(Context *ctx,
1802                                                   const RegisterType regtype,
1803                                                   char *buf, const size_t len)
1804 {
1805     const char *shadertype = ctx->shader_type_str;
1806     const char *type = get_GLSL_uniform_type(ctx, regtype);
1807     snprintf(buf, len, "%s_uniforms_%s", shadertype, type);
1808     return buf;
1809 } // get_GLSL_uniform_array_varname
1810 
1811 static const char *get_GLSL_destarg_varname(Context *ctx, char *buf, size_t len)
1812 {
1813     const DestArgInfo *arg = &ctx->dest_arg;
1814     return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
1815 } // get_GLSL_destarg_varname
1816 
1817 static const char *get_GLSL_srcarg_varname(Context *ctx, const size_t idx,
1818                                            char *buf, size_t len)
1819 {
1820     if (idx >= STATICARRAYLEN(ctx->source_args))
1821     {
1822         fail(ctx, "Too many source args");
1823         *buf = '\0';
1824         return buf;
1825     } // if
1826 
1827     const SourceArgInfo *arg = &ctx->source_args[idx];
1828     return get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, len);
1829 } // get_GLSL_srcarg_varname
1830 
1831 
1832 static const char *make_GLSL_destarg_assign(Context *, char *, const size_t,
1833                                             const char *, ...) ISPRINTF(4,5);
1834 
1835 static const char *make_GLSL_destarg_assign(Context *ctx, char *buf,
1836                                             const size_t buflen,
1837                                             const char *fmt, ...)
1838 {
1839     int need_parens = 0;
1840     const DestArgInfo *arg = &ctx->dest_arg;
1841 
1842     if (arg->writemask == 0)
1843     {
1844         *buf = '\0';
1845         return buf;  // no writemask? It's a no-op.
1846     } // if
1847 
1848     char clampbuf[32] = { '\0' };
1849     const char *clampleft = "";
1850     const char *clampright = "";
1851     if (arg->result_mod & MOD_SATURATE)
1852     {
1853         const int vecsize = vecsize_from_writemask(arg->writemask);
1854         clampleft = "clamp(";
1855         if (vecsize == 1)
1856             clampright = ", 0.0, 1.0)";
1857         else
1858         {
1859             snprintf(clampbuf, sizeof (clampbuf),
1860                      ", vec%d(0.0), vec%d(1.0))", vecsize, vecsize);
1861             clampright = clampbuf;
1862         } // else
1863     } // if
1864 
1865     // MSDN says MOD_PP is a hint and many implementations ignore it. So do we.
1866 
1867     // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
1868     assert((arg->result_mod & MOD_CENTROID) == 0);
1869 
1870     if (ctx->predicated)
1871     {
1872         fail(ctx, "predicated destinations unsupported");  // !!! FIXME
1873         *buf = '\0';
1874         return buf;
1875     } // if
1876 
1877     char operation[256];
1878     va_list ap;
1879     va_start(ap, fmt);
1880     const int len = vsnprintf(operation, sizeof (operation), fmt, ap);
1881     va_end(ap);
1882     if (len >= sizeof (operation))
1883     {
1884         fail(ctx, "operation string too large");  // I'm lazy.  :P
1885         *buf = '\0';
1886         return buf;
1887     } // if
1888 
1889     const char *result_shift_str = "";
1890     switch (arg->result_shift)
1891     {
1892         case 0x1: result_shift_str = " * 2.0"; break;
1893         case 0x2: result_shift_str = " * 4.0"; break;
1894         case 0x3: result_shift_str = " * 8.0"; break;
1895         case 0xD: result_shift_str = " / 8.0"; break;
1896         case 0xE: result_shift_str = " / 4.0"; break;
1897         case 0xF: result_shift_str = " / 2.0"; break;
1898     } // switch
1899     need_parens |= (result_shift_str[0] != '\0');
1900 
1901     char regnum_str[16];
1902     const char *regtype_str = get_GLSL_register_string(ctx, arg->regtype,
1903                                                        arg->regnum, regnum_str,
1904                                                        sizeof (regnum_str));
1905     char writemask_str[6];
1906     size_t i = 0;
1907     const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
1908     if (!scalar && !writemask_xyzw(arg->writemask))
1909     {
1910         writemask_str[i++] = '.';
1911         if (arg->writemask0) writemask_str[i++] = 'x';
1912         if (arg->writemask1) writemask_str[i++] = 'y';
1913         if (arg->writemask2) writemask_str[i++] = 'z';
1914         if (arg->writemask3) writemask_str[i++] = 'w';
1915     } // if
1916     writemask_str[i] = '\0';
1917     assert(i < sizeof (writemask_str));
1918 
1919     const char *leftparen = (need_parens) ? "(" : "";
1920     const char *rightparen = (need_parens) ? ")" : "";
1921 
1922     snprintf(buf, buflen, "%s_%s%s%s = %s%s%s%s%s%s;",
1923              ctx->shader_type_str, regtype_str, regnum_str, writemask_str,
1924              clampleft, leftparen, operation, rightparen, result_shift_str,
1925              clampright);
1926     // !!! FIXME: make sure the scratch buffer was large enough.
1927     return buf;
1928 } // make_GLSL_destarg_assign
1929 
1930 
1931 static char *make_GLSL_swizzle_string(char *swiz_str, const size_t strsize,
1932                                       const int swizzle, const int writemask)
1933 {
1934     size_t i = 0;
1935     if ( (!no_swizzle(swizzle)) || (!writemask_xyzw(writemask)) )
1936     {
1937         const int writemask0 = (writemask >> 0) & 0x1;
1938         const int writemask1 = (writemask >> 1) & 0x1;
1939         const int writemask2 = (writemask >> 2) & 0x1;
1940         const int writemask3 = (writemask >> 3) & 0x1;
1941 
1942         const int swizzle_x = (swizzle >> 0) & 0x3;
1943         const int swizzle_y = (swizzle >> 2) & 0x3;
1944         const int swizzle_z = (swizzle >> 4) & 0x3;
1945         const int swizzle_w = (swizzle >> 6) & 0x3;
1946 
1947         swiz_str[i++] = '.';
1948         if (writemask0) swiz_str[i++] = swizzle_channels[swizzle_x];
1949         if (writemask1) swiz_str[i++] = swizzle_channels[swizzle_y];
1950         if (writemask2) swiz_str[i++] = swizzle_channels[swizzle_z];
1951         if (writemask3) swiz_str[i++] = swizzle_channels[swizzle_w];
1952     } // if
1953     assert(i < strsize);
1954     swiz_str[i] = '\0';
1955     return swiz_str;
1956 } // make_GLSL_swizzle_string
1957 
1958 
1959 static const char *make_GLSL_srcarg_string(Context *ctx, const size_t idx,
1960                                            const int writemask, char *buf,
1961                                            const size_t buflen)
1962 {
1963     *buf = '\0';
1964 
1965     if (idx >= STATICARRAYLEN(ctx->source_args))
1966     {
1967         fail(ctx, "Too many source args");
1968         return buf;
1969     } // if
1970 
1971     const SourceArgInfo *arg = &ctx->source_args[idx];
1972 
1973     const char *premod_str = "";
1974     const char *postmod_str = "";
1975     switch (arg->src_mod)
1976     {
1977         case SRCMOD_NEGATE:
1978             premod_str = "-";
1979             break;
1980 
1981         case SRCMOD_BIASNEGATE:
1982             premod_str = "-(";
1983             postmod_str = " - 0.5)";
1984             break;
1985 
1986         case SRCMOD_BIAS:
1987             premod_str = "(";
1988             postmod_str = " - 0.5)";
1989             break;
1990 
1991         case SRCMOD_SIGNNEGATE:
1992             premod_str = "-((";
1993             postmod_str = " - 0.5) * 2.0)";
1994             break;
1995 
1996         case SRCMOD_SIGN:
1997             premod_str = "((";
1998             postmod_str = " - 0.5) * 2.0)";
1999             break;
2000 
2001         case SRCMOD_COMPLEMENT:
2002             premod_str = "(1.0 - ";
2003             postmod_str = ")";
2004             break;
2005 
2006         case SRCMOD_X2NEGATE:
2007             premod_str = "-(";
2008             postmod_str = " * 2.0)";
2009             break;
2010 
2011         case SRCMOD_X2:
2012             premod_str = "(";
2013             postmod_str = " * 2.0)";
2014             break;
2015 
2016         case SRCMOD_DZ:
2017             fail(ctx, "SRCMOD_DZ unsupported"); return buf; // !!! FIXME
2018             postmod_str = "_dz";
2019             break;
2020 
2021         case SRCMOD_DW:
2022             fail(ctx, "SRCMOD_DW unsupported"); return buf; // !!! FIXME
2023             postmod_str = "_dw";
2024             break;
2025 
2026         case SRCMOD_ABSNEGATE:
2027             premod_str = "-abs(";
2028             postmod_str = ")";
2029             break;
2030 
2031         case SRCMOD_ABS:
2032             premod_str = "abs(";
2033             postmod_str = ")";
2034             break;
2035 
2036         case SRCMOD_NOT:
2037             premod_str = "!";
2038             break;
2039 
2040         case SRCMOD_NONE:
2041         case SRCMOD_TOTAL:
2042              break;  // stop compiler whining.
2043     } // switch
2044 
2045     const char *regtype_str = NULL;
2046 
2047     if (!arg->relative)
2048     {
2049         regtype_str = get_GLSL_varname_in_buf(ctx, arg->regtype, arg->regnum,
2050                                               (char *) alloca(64), 64);
2051     } // if
2052 
2053     const char *rel_lbracket = "";
2054     char rel_offset[32] = { '\0' };
2055     const char *rel_rbracket = "";
2056     char rel_swizzle[4] = { '\0' };
2057     const char *rel_regtype_str = "";
2058     if (arg->relative)
2059     {
2060         if (arg->regtype == REG_TYPE_INPUT)
2061             regtype_str=get_GLSL_input_array_varname(ctx,(char*)alloca(64),64);
2062         else
2063         {
2064             assert(arg->regtype == REG_TYPE_CONST);
2065             const int arrayidx = arg->relative_array->index;
2066             const int offset = arg->regnum - arrayidx;
2067             assert(offset >= 0);
2068             if (arg->relative_array->constant)
2069             {
2070                 const int arraysize = arg->relative_array->count;
2071                 regtype_str = get_GLSL_const_array_varname_in_buf(ctx,
2072                                 arrayidx, arraysize, (char *) alloca(64), 64);
2073                 if (offset != 0)
2074                     snprintf(rel_offset, sizeof (rel_offset), "%d + ", offset);
2075             } // if
2076             else
2077             {
2078                 regtype_str = get_GLSL_uniform_array_varname(ctx, arg->regtype,
2079                                                       (char *) alloca(64), 64);
2080                 if (offset == 0)
2081                 {
2082                     snprintf(rel_offset, sizeof (rel_offset),
2083                              "ARRAYBASE_%d + ", arrayidx);
2084                 } // if
2085                 else
2086                 {
2087                     snprintf(rel_offset, sizeof (rel_offset),
2088                              "(ARRAYBASE_%d + %d) + ", arrayidx, offset);
2089                 } // else
2090             } // else
2091         } // else
2092 
2093         rel_lbracket = "[";
2094 
2095         rel_regtype_str = get_GLSL_varname_in_buf(ctx, arg->relative_regtype,
2096                                                   arg->relative_regnum,
2097                                                   (char *) alloca(64), 64);
2098         rel_swizzle[0] = '.';
2099         rel_swizzle[1] = swizzle_channels[arg->relative_component];
2100         rel_swizzle[2] = '\0';
2101         rel_rbracket = "]";
2102     } // if
2103 
2104     char swiz_str[6] = { '\0' };
2105     if (!isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum))
2106     {
2107         make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str),
2108                                  arg->swizzle, writemask);
2109     } // if
2110 
2111     if (regtype_str == NULL)
2112     {
2113         fail(ctx, "Unknown source register type.");
2114         return buf;
2115     } // if
2116 
2117     snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s",
2118              premod_str, regtype_str, rel_lbracket, rel_offset,
2119              rel_regtype_str, rel_swizzle, rel_rbracket, swiz_str,
2120              postmod_str);
2121     // !!! FIXME: make sure the scratch buffer was large enough.
2122     return buf;
2123 } // make_GLSL_srcarg_string
2124 
2125 // generate some convenience functions.
2126 #define MAKE_GLSL_SRCARG_STRING_(mask, bitmask) \
2127     static inline const char *make_GLSL_srcarg_string_##mask(Context *ctx, \
2128                                                 const size_t idx, char *buf, \
2129                                                 const size_t buflen) { \
2130         return make_GLSL_srcarg_string(ctx, idx, bitmask, buf, buflen); \
2131     }
2132 MAKE_GLSL_SRCARG_STRING_(x, (1 << 0))
2133 MAKE_GLSL_SRCARG_STRING_(y, (1 << 1))
2134 MAKE_GLSL_SRCARG_STRING_(z, (1 << 2))
2135 MAKE_GLSL_SRCARG_STRING_(w, (1 << 3))
2136 MAKE_GLSL_SRCARG_STRING_(scalar, (1 << 0))
2137 MAKE_GLSL_SRCARG_STRING_(full, 0xF)
2138 MAKE_GLSL_SRCARG_STRING_(masked, ctx->dest_arg.writemask)
2139 MAKE_GLSL_SRCARG_STRING_(vec3, 0x7)
2140 MAKE_GLSL_SRCARG_STRING_(vec2, 0x3)
2141 #undef MAKE_GLSL_SRCARG_STRING_
2142 
2143 // special cases for comparison opcodes...
2144 
2145 static const char *get_GLSL_comparison_string_scalar(Context *ctx)
2146 {
2147     static const char *comps[] = { "", ">", "==", ">=", "<", "!=", "<=" };
2148     if (ctx->instruction_controls >= STATICARRAYLEN(comps))
2149     {
2150         fail(ctx, "unknown comparison control");
2151         return "";
2152     } // if
2153 
2154     return comps[ctx->instruction_controls];
2155 } // get_GLSL_comparison_string_scalar
2156 
2157 static const char *get_GLSL_comparison_string_vector(Context *ctx)
2158 {
2159     static const char *comps[] = {
2160         "", "greaterThan", "equal", "greaterThanEqual", "lessThan",
2161         "notEqual", "lessThanEqual"
2162     };
2163 
2164     if (ctx->instruction_controls >= STATICARRAYLEN(comps))
2165     {
2166         fail(ctx, "unknown comparison control");
2167         return "";
2168     } // if
2169 
2170     return comps[ctx->instruction_controls];
2171 } // get_GLSL_comparison_string_vector
2172 
2173 
2174 static void emit_GLSL_start(Context *ctx, const char *profilestr)
2175 {
2176     if (!shader_is_vertex(ctx) && !shader_is_pixel(ctx))
2177     {
2178         failf(ctx, "Shader type %u unsupported in this profile.",
2179               (uint) ctx->shader_type);
2180         return;
2181     } // if
2182 
2183     else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL) == 0)
2184     {
2185         // No gl_FragData[] before GLSL 1.10, so we have to force the version.
2186         push_output(ctx, &ctx->preflight);
2187         output_line(ctx, "#version 110");
2188         pop_output(ctx);
2189     } // else if
2190 
2191     #if SUPPORT_PROFILE_GLSL120
2192     else if (strcmp(profilestr, MOJOSHADER_PROFILE_GLSL120) == 0)
2193     {
2194         ctx->profile_supports_glsl120 = 1;
2195         push_output(ctx, &ctx->preflight);
2196         output_line(ctx, "#version 120");
2197         pop_output(ctx);
2198     } // else if
2199     #endif
2200 
2201     else
2202     {
2203         failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
2204         return;
2205     } // else
2206 
2207     push_output(ctx, &ctx->mainline_intro);
2208     output_line(ctx, "void main()");
2209     output_line(ctx, "{");
2210     pop_output(ctx);
2211 
2212     set_output(ctx, &ctx->mainline);
2213     ctx->indent++;
2214 } // emit_GLSL_start
2215 
2216 static void emit_GLSL_RET(Context *ctx);
2217 static void emit_GLSL_end(Context *ctx)
2218 {
2219     // ps_1_* writes color to r0 instead oC0. We move it to the right place.
2220     // We don't have to worry about a RET opcode messing this up, since
2221     //  RET isn't available before ps_2_0.
2222     if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
2223     {
2224         const char *shstr = ctx->shader_type_str;
2225         set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1);
2226         output_line(ctx, "%s_oC0 = %s_r0;", shstr, shstr);
2227     } // if
2228 
2229     // force a RET opcode if we're at the end of the stream without one.
2230     if (ctx->previous_opcode != OPCODE_RET)
2231         emit_GLSL_RET(ctx);
2232 } // emit_GLSL_end
2233 
2234 static void emit_GLSL_phase(Context *ctx)
2235 {
2236     // no-op in GLSL.
2237 } // emit_GLSL_phase
2238 
2239 static void output_GLSL_uniform_array(Context *ctx, const RegisterType regtype,
2240                                       const int size)
2241 {
2242     if (size > 0)
2243     {
2244         char buf[64];
2245         get_GLSL_uniform_array_varname(ctx, regtype, buf, sizeof (buf));
2246         output_line(ctx, "uniform vec4 %s[%d];", buf, size);
2247     } // if
2248 } // output_GLSL_uniform_array
2249 
2250 static void emit_GLSL_finalize(Context *ctx)
2251 {
2252     // throw some blank lines around to make source more readable.
2253     push_output(ctx, &ctx->globals);
2254     output_blank_line(ctx);
2255     pop_output(ctx);
2256 
2257     // If we had a relative addressing of REG_TYPE_INPUT, we need to build
2258     //  an array for it at the start of main(). GLSL doesn't let you specify
2259     //  arrays of attributes.
2260     //vec4 blah_array[BIGGEST_ARRAY];
2261     if (ctx->have_relative_input_registers) // !!! FIXME
2262         fail(ctx, "Relative addressing of input registers not supported.");
2263 
2264     push_output(ctx, &ctx->preflight);
2265     output_GLSL_uniform_array(ctx, REG_TYPE_CONST, ctx->uniform_float4_count);
2266     output_GLSL_uniform_array(ctx, REG_TYPE_CONSTINT, ctx->uniform_int4_count);
2267     output_GLSL_uniform_array(ctx, REG_TYPE_CONSTBOOL, ctx->uniform_bool_count);
2268     pop_output(ctx);
2269 } // emit_GLSL_finalize
2270 
2271 static void emit_GLSL_global(Context *ctx, RegisterType regtype, int regnum)
2272 {
2273     char varname[64];
2274     get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
2275 
2276     push_output(ctx, &ctx->globals);
2277     switch (regtype)
2278     {
2279         case REG_TYPE_ADDRESS:
2280             if (shader_is_vertex(ctx))
2281                 output_line(ctx, "ivec4 %s;", varname);
2282             else if (shader_is_pixel(ctx))  // actually REG_TYPE_TEXTURE.
2283             {
2284                 // We have to map texture registers to temps for ps_1_1, since
2285                 //  they work like temps, initialize with tex coords, and the
2286                 //  ps_1_1 TEX opcode expects to overwrite it.
2287                 if (!shader_version_atleast(ctx, 1, 4))
2288                 {
2289                     output_line(ctx, "vec4 %s = gl_TexCoord[%d];",
2290                                 varname, regnum);
2291                 } // if
2292             } // else if
2293             break;
2294         case REG_TYPE_PREDICATE:
2295             output_line(ctx, "bvec4 %s;", varname);
2296             break;
2297         case REG_TYPE_TEMP:
2298             output_line(ctx, "vec4 %s;", varname);
2299             break;
2300         case REG_TYPE_LOOP:
2301             break; // no-op. We declare these in for loops at the moment.
2302         case REG_TYPE_LABEL:
2303             break; // no-op. If we see it here, it means we optimized it out.
2304         default:
2305             fail(ctx, "BUG: we used a register we don't know how to define.");
2306             break;
2307     } // switch
2308     pop_output(ctx);
2309 } // emit_GLSL_global
2310 
2311 static void emit_GLSL_array(Context *ctx, VariableList *var)
2312 {
2313     // All uniforms (except constant arrays, which only get pushed once at
2314     //  compile time) are now packed into a single array, so we can batch
2315     //  the uniform transfers. So this is doesn't actually define an array
2316     //  here; the one, big array is emitted during finalization instead.
2317     // However, we need to #define the offset into the one, big array here,
2318     //  and let dereferences use that #define.
2319     const int base = var->index;
2320     const int glslbase = ctx->uniform_float4_count;
2321     push_output(ctx, &ctx->globals);
2322     output_line(ctx, "#define ARRAYBASE_%d %d", base, glslbase);
2323     pop_output(ctx);
2324     var->emit_position = glslbase;
2325 } // emit_GLSL_array
2326 
2327 static void emit_GLSL_const_array(Context *ctx, const ConstantsList *clist,
2328                                   int base, int size)
2329 {
2330     char varname[64];
2331     get_GLSL_const_array_varname_in_buf(ctx,base,size,varname,sizeof(varname));
2332 
2333 #if 0
2334     // !!! FIXME: fails on Nvidia's and Apple's GL, even with #version 120.
2335     // !!! FIXME:  (the 1.20 spec says it should work, though, I think...)
2336     if (support_glsl120(ctx))
2337     {
2338         // GLSL 1.20 can do constant arrays.
2339         const char *cstr = NULL;
2340         push_output(ctx, &ctx->globals);
2341         output_line(ctx, "const vec4 %s[%d] = vec4[%d](", varname, size, size);
2342         ctx->indent++;
2343 
2344         int i;
2345         for (i = 0; i < size; i++)
2346         {
2347             while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
2348                 clist = clist->next;
2349             assert(clist->constant.index == (base + i));
2350 
2351             char val0[32];
2352             char val1[32];
2353             char val2[32];
2354             char val3[32];
2355             floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
2356             floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
2357             floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
2358             floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
2359 
2360             output_line(ctx, "vec4(%s, %s, %s, %s)%s", val0, val1, val2, val3,
2361                         (i < (size-1)) ? "," : "");
2362 
2363             clist = clist->next;
2364         } // for
2365 
2366         ctx->indent--;
2367         output_line(ctx, ");");
2368         pop_output(ctx);
2369     } // if
2370 
2371     else
2372 #endif
2373     {
2374         // stock GLSL 1.0 can't do constant arrays, so make a uniform array
2375         //  and have the OpenGL glue assign it at link time. Lame!
2376         push_output(ctx, &ctx->globals);
2377         output_line(ctx, "uniform vec4 %s[%d];", varname, size);
2378         pop_output(ctx);
2379     } // else
2380 } // emit_GLSL_const_array
2381 
2382 static void emit_GLSL_uniform(Context *ctx, RegisterType regtype, int regnum,
2383                               const VariableList *var)
2384 {
2385     // Now that we're pushing all the uniforms as one big array, pack these
2386     //  down, so if we only use register c439, it'll actually map to
2387     //  glsl_uniforms_vec4[0]. As we push one big array, this will prevent
2388     //  uploading unused data.
2389 
2390     char varname[64];
2391     char name[64];
2392     int index = 0;
2393 
2394     get_GLSL_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
2395 
2396     push_output(ctx, &ctx->globals);
2397 
2398     if (var == NULL)
2399     {
2400         get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name));
2401 
2402         if (regtype == REG_TYPE_CONST)
2403             index = ctx->uniform_float4_count;
2404         else if (regtype == REG_TYPE_CONSTINT)
2405             index = ctx->uniform_int4_count;
2406         else if (regtype == REG_TYPE_CONSTBOOL)
2407             index = ctx->uniform_bool_count;
2408         else  // get_GLSL_uniform_array_varname() would have called fail().
2409             assert(isfail(ctx));
2410 
2411         output_line(ctx, "#define %s %s[%d]", varname, name, index);
2412     } // if
2413 
2414     else
2415     {
2416         const int arraybase = var->index;
2417         if (var->constant)
2418         {
2419             get_GLSL_const_array_varname_in_buf(ctx, arraybase, var->count,
2420                                                 name, sizeof (name));
2421             index = (regnum - arraybase);
2422         } // if
2423         else
2424         {
2425             assert(var->emit_position != -1);
2426             get_GLSL_uniform_array_varname(ctx, regtype, name, sizeof (name));
2427             index = (regnum - arraybase) + var->emit_position;
2428         } // else
2429 
2430         output_line(ctx, "#define %s %s[%d]", varname, name, index);
2431     } // else
2432 
2433     pop_output(ctx);
2434 } // emit_GLSL_uniform
2435 
2436 static void emit_GLSL_sampler(Context *ctx,int stage,TextureType ttype,int tb)
2437 {
2438     const char *type = "";
2439     switch (ttype)
2440     {
2441         case TEXTURE_TYPE_2D: type = "sampler2D"; break;
2442         case TEXTURE_TYPE_CUBE: type = "samplerCube"; break;
2443         case TEXTURE_TYPE_VOLUME: type = "sampler3D"; break;
2444         default: fail(ctx, "BUG: used a sampler we don't know how to define.");
2445     } // switch
2446 
2447     char var[64];
2448     get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof (var));
2449 
2450     push_output(ctx, &ctx->globals);
2451     output_line(ctx, "uniform %s %s;", type, var);
2452     if (tb)  // This sampler used a ps_1_1 TEXBEM opcode?
2453     {
2454         char name[64];
2455         const int index = ctx->uniform_float4_count;
2456         ctx->uniform_float4_count += 2;
2457         get_GLSL_uniform_array_varname(ctx, REG_TYPE_CONST, name, sizeof (name));
2458         output_line(ctx, "#define %s_texbem %s[%d]", var, name, index);
2459         output_line(ctx, "#define %s_texbeml %s[%d]", var, name, index+1);
2460     } // if
2461     pop_output(ctx);
2462 } // emit_GLSL_sampler
2463 
2464 static void emit_GLSL_attribute(Context *ctx, RegisterType regtype, int regnum,
2465                                 MOJOSHADER_usage usage, int index, int wmask,
2466                                 int flags)
2467 {
2468     // !!! FIXME: this function doesn't deal with write masks at all yet!
2469     const char *usage_str = NULL;
2470     const char *arrayleft = "";
2471     const char *arrayright = "";
2472     char index_str[16] = { '\0' };
2473     char var[64];
2474 
2475     get_GLSL_varname_in_buf(ctx, regtype, regnum, var, sizeof (var));
2476 
2477     //assert((flags & MOD_PP) == 0);  // !!! FIXME: is PP allowed?
2478 
2479     if (index != 0)  // !!! FIXME: a lot of these MUST be zero.
2480         snprintf(index_str, sizeof (index_str), "%u", (uint) index);
2481 
2482     if (shader_is_vertex(ctx))
2483     {
2484         // pre-vs3 output registers.
2485         // these don't ever happen in DCL opcodes, I think. Map to vs_3_*
2486         //  output registers.
2487         if (!shader_version_atleast(ctx, 3, 0))
2488         {
2489             if (regtype == REG_TYPE_RASTOUT)
2490             {
2491                 regtype = REG_TYPE_OUTPUT;
2492                 index = regnum;
2493                 switch ((const RastOutType) regnum)
2494                 {
2495                     case RASTOUT_TYPE_POSITION:
2496                         usage = MOJOSHADER_USAGE_POSITION;
2497                         break;
2498                     case RASTOUT_TYPE_FOG:
2499                         usage = MOJOSHADER_USAGE_FOG;
2500                         break;
2501                     case RASTOUT_TYPE_POINT_SIZE:
2502                         usage = MOJOSHADER_USAGE_POINTSIZE;
2503                         break;
2504                 } // switch
2505             } // if
2506 
2507             else if (regtype == REG_TYPE_ATTROUT)
2508             {
2509                 regtype = REG_TYPE_OUTPUT;
2510                 usage = MOJOSHADER_USAGE_COLOR;
2511                 index = regnum;
2512             } // else if
2513 
2514             else if (regtype == REG_TYPE_TEXCRDOUT)
2515             {
2516                 regtype = REG_TYPE_OUTPUT;
2517                 usage = MOJOSHADER_USAGE_TEXCOORD;
2518                 index = regnum;
2519             } // else if
2520         } // if
2521 
2522         // to avoid limitations of various GL entry points for input
2523         // attributes (glSecondaryColorPointer() can only take 3 component
2524         // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
2525         // issues), we set up all inputs as generic vertex attributes, so we
2526         // can pass data in just about any form, and ignore the built-in GLSL
2527         // attributes like gl_SecondaryColor. Output needs to use the the
2528         // built-ins, though, but we don't have to worry about the GL entry
2529         // point limitations there.
2530 
2531         if (regtype == REG_TYPE_INPUT)
2532         {
2533             push_output(ctx, &ctx->globals);
2534             output_line(ctx, "attribute vec4 %s;", var);
2535             pop_output(ctx);
2536         } // if
2537 
2538         else if (regtype == REG_TYPE_OUTPUT)
2539         {
2540             switch (usage)
2541             {
2542                 case MOJOSHADER_USAGE_POSITION:
2543                     usage_str = "gl_Position";
2544                     break;
2545                 case MOJOSHADER_USAGE_POINTSIZE:
2546                     usage_str = "gl_PointSize";
2547                     break;
2548                 case MOJOSHADER_USAGE_COLOR:
2549                     index_str[0] = '\0';  // no explicit number.
2550                     if (index == 0)
2551                         usage_str = "gl_FrontColor";
2552                     else if (index == 1)
2553                         usage_str = "gl_FrontSecondaryColor";
2554                     break;
2555                 case MOJOSHADER_USAGE_FOG:
2556                     usage_str = "gl_FogFragCoord";
2557                     break;
2558                 case MOJOSHADER_USAGE_TEXCOORD:
2559                     snprintf(index_str, sizeof (index_str), "%u", (uint) index);
2560                     usage_str = "gl_TexCoord";
2561                     arrayleft = "[";
2562                     arrayright = "]";
2563                     break;
2564                 default:
2565                     // !!! FIXME: we need to deal with some more built-in varyings here.
2566                     break;
2567             } // switch
2568 
2569             // !!! FIXME: the #define is a little hacky, but it means we don't
2570             // !!! FIXME:  have to track these separately if this works.
2571             push_output(ctx, &ctx->globals);
2572             // no mapping to built-in var? Just make it a regular global, pray.
2573             if (usage_str == NULL)
2574                 output_line(ctx, "vec4 %s;", var);
2575             else
2576             {
2577                 output_line(ctx, "#define %s %s%s%s%s", var, usage_str,
2578                             arrayleft, index_str, arrayright);
2579             } // else
2580             pop_output(ctx);
2581         } // else if
2582 
2583         else
2584         {
2585             fail(ctx, "unknown vertex shader attribute register");
2586         } // else
2587     } // if
2588 
2589     else if (shader_is_pixel(ctx))
2590     {
2591         // samplers DCLs get handled in emit_GLSL_sampler().
2592 
2593         if (flags & MOD_CENTROID)  // !!! FIXME
2594         {
2595             failf(ctx, "centroid unsupported in %s profile", ctx->profile->name);
2596             return;
2597         } // if
2598 
2599         if (regtype == REG_TYPE_COLOROUT)
2600         {
2601             if (!ctx->have_multi_color_outputs)
2602                 usage_str = "gl_FragColor";  // maybe faster?
2603             else
2604             {
2605                 snprintf(index_str, sizeof (index_str), "%u", (uint) regnum);
2606                 usage_str = "gl_FragData";
2607                 arrayleft = "[";
2608                 arrayright = "]";
2609             } // else
2610         } // if
2611 
2612         else if (regtype == REG_TYPE_DEPTHOUT)
2613             usage_str = "gl_FragDepth";
2614 
2615         // !!! FIXME: can you actualy have a texture register with COLOR usage?
2616         else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT))
2617         {
2618             if (usage == MOJOSHADER_USAGE_TEXCOORD)
2619             {
2620                 // ps_1_1 does a different hack for this attribute.
2621                 //  Refer to emit_GLSL_global()'s REG_TYPE_TEXTURE code.
2622                 if (shader_version_atleast(ctx, 1, 4))
2623                 {
2624                     snprintf(index_str, sizeof (index_str), "%u", (uint) index);
2625                     usage_str = "gl_TexCoord";
2626                     arrayleft = "[";
2627                     arrayright = "]";
2628                 } // if
2629             } // if
2630 
2631             else if (usage == MOJOSHADER_USAGE_COLOR)
2632             {
2633                 index_str[0] = '\0';  // no explicit number.
2634                 if (index == 0)
2635                     usage_str = "gl_Color";
2636                 else if (index == 1)
2637                     usage_str = "gl_SecondaryColor";
2638                 else
2639                     fail(ctx, "unsupported color index");
2640             } // else if
2641         } // else if
2642 
2643         else if (regtype == REG_TYPE_MISCTYPE)
2644         {
2645             const MiscTypeType mt = (MiscTypeType) regnum;
2646             if (mt == MISCTYPE_TYPE_FACE)
2647             {
2648                 push_output(ctx, &ctx->globals);
2649                 output_line(ctx, "float %s = gl_FrontFacing ? 1.0 : -1.0;", var);
2650                 pop_output(ctx);
2651             } // if
2652             else if (mt == MISCTYPE_TYPE_POSITION)
2653             {
2654                 index_str[0] = '\0';  // no explicit number.
2655                 usage_str = "gl_FragCoord";  // !!! FIXME: is this the same coord space as D3D?
2656             } // else if
2657             else
2658             {
2659                 fail(ctx, "BUG: unhandled misc register");
2660             } // else
2661         } // else if
2662 
2663         else
2664         {
2665             fail(ctx, "unknown pixel shader attribute register");
2666         } // else
2667 
2668         if (usage_str != NULL)
2669         {
2670             push_output(ctx, &ctx->globals);
2671             output_line(ctx, "#define %s %s%s%s%s", var, usage_str,
2672                         arrayleft, index_str, arrayright);
2673             pop_output(ctx);
2674         } // if
2675     } // else if
2676 
2677     else
2678     {
2679         fail(ctx, "Unknown shader type");  // state machine should catch this.
2680     } // else
2681 } // emit_GLSL_attribute
2682 
2683 static void emit_GLSL_NOP(Context *ctx)
2684 {
2685     // no-op is a no-op.  :)
2686 } // emit_GLSL_NOP
2687 
2688 static void emit_GLSL_MOV(Context *ctx)
2689 {
2690     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2691     char code[128];
2692     make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s", src0);
2693     output_line(ctx, "%s", code);
2694 } // emit_GLSL_MOV
2695 
2696 static void emit_GLSL_ADD(Context *ctx)
2697 {
2698     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2699     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2700     char code[128];
2701     make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s + %s", src0, src1);
2702     output_line(ctx, "%s", code);
2703 } // emit_GLSL_ADD
2704 
2705 static void emit_GLSL_SUB(Context *ctx)
2706 {
2707     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2708     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2709     char code[128];
2710     make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s - %s", src0, src1);
2711     output_line(ctx, "%s", code);
2712 } // emit_GLSL_SUB
2713 
2714 static void emit_GLSL_MAD(Context *ctx)
2715 {
2716     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2717     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2718     char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2));
2719     char code[128];
2720     make_GLSL_destarg_assign(ctx, code, sizeof (code), "(%s * %s) + %s", src0, src1, src2);
2721     output_line(ctx, "%s", code);
2722 } // emit_GLSL_MAD
2723 
2724 static void emit_GLSL_MUL(Context *ctx)
2725 {
2726     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2727     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2728     char code[128];
2729     make_GLSL_destarg_assign(ctx, code, sizeof (code), "%s * %s", src0, src1);
2730     output_line(ctx, "%s", code);
2731 } // emit_GLSL_MUL
2732 
2733 static void emit_GLSL_RCP(Context *ctx)
2734 {
2735     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2736     char code[128];
2737     make_GLSL_destarg_assign(ctx, code, sizeof (code), "1.0 / %s", src0);
2738     output_line(ctx, "%s", code);
2739 } // emit_GLSL_RCP
2740 
2741 static void emit_GLSL_RSQ(Context *ctx)
2742 {
2743     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2744     char code[128];
2745     make_GLSL_destarg_assign(ctx, code, sizeof (code), "inversesqrt(%s)", src0);
2746     output_line(ctx, "%s", code);
2747 } // emit_GLSL_RSQ
2748 
2749 static void emit_GLSL_dotprod(Context *ctx, const char *src0, const char *src1,
2750                               const char *extra)
2751 {
2752     const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
2753     char castleft[16] = { '\0' };
2754     const char *castright = "";
2755     if (vecsize != 1)
2756     {
2757         snprintf(castleft, sizeof (castleft), "vec%d(", vecsize);
2758         castright = ")";
2759     } // if
2760 
2761     char code[128];
2762     make_GLSL_destarg_assign(ctx, code, sizeof (code), "%sdot(%s, %s)%s%s",
2763                              castleft, src0, src1, extra, castright);
2764     output_line(ctx, "%s", code);
2765 } // emit_GLSL_dotprod
2766 
2767 static void emit_GLSL_DP3(Context *ctx)
2768 {
2769     char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
2770     char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1));
2771     emit_GLSL_dotprod(ctx, src0, src1, "");
2772 } // emit_GLSL_DP3
2773 
2774 static void emit_GLSL_DP4(Context *ctx)
2775 {
2776     char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
2777     char src1[64]; make_GLSL_srcarg_string_full(ctx, 1, src1, sizeof (src1));
2778     emit_GLSL_dotprod(ctx, src0, src1, "");
2779 } // emit_GLSL_DP4
2780 
2781 static void emit_GLSL_MIN(Context *ctx)
2782 {
2783     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2784     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2785     char code[128];
2786     make_GLSL_destarg_assign(ctx, code, sizeof (code), "min(%s, %s)", src0, src1);
2787     output_line(ctx, "%s", code);
2788 } // emit_GLSL_MIN
2789 
2790 static void emit_GLSL_MAX(Context *ctx)
2791 {
2792     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2793     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2794     char code[128];
2795     make_GLSL_destarg_assign(ctx, code, sizeof (code), "max(%s, %s)", src0, src1);
2796     output_line(ctx, "%s", code);
2797 } // emit_GLSL_MAX
2798 
2799 static void emit_GLSL_SLT(Context *ctx)
2800 {
2801     const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
2802     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2803     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2804     char code[128];
2805 
2806     // float(bool) or vec(bvec) results in 0.0 or 1.0, like SLT wants.
2807     if (vecsize == 1)
2808         make_GLSL_destarg_assign(ctx, code, sizeof (code), "float(%s < %s)", src0, src1);
2809     else
2810     {
2811         make_GLSL_destarg_assign(ctx, code, sizeof (code),
2812                                  "vec%d(lessThan(%s, %s))",
2813                                  vecsize, src0, src1);
2814     } // else
2815     output_line(ctx, "%s", code);
2816 } // emit_GLSL_SLT
2817 
2818 static void emit_GLSL_SGE(Context *ctx)
2819 {
2820     const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
2821     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2822     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2823     char code[128];
2824 
2825     // float(bool) or vec(bvec) results in 0.0 or 1.0, like SGE wants.
2826     if (vecsize == 1)
2827     {
2828         make_GLSL_destarg_assign(ctx, code, sizeof (code),
2829                                  "float(%s >= %s)", src0, src1);
2830     } // if
2831     else
2832     {
2833         make_GLSL_destarg_assign(ctx, code, sizeof (code),
2834                                  "vec%d(greaterThanEqual(%s, %s))",
2835                                  vecsize, src0, src1);
2836     } // else
2837     output_line(ctx, "%s", code);
2838 } // emit_GLSL_SGE
2839 
2840 static void emit_GLSL_EXP(Context *ctx)
2841 {
2842     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2843     char code[128];
2844     make_GLSL_destarg_assign(ctx, code, sizeof (code), "exp2(%s)", src0);
2845     output_line(ctx, "%s", code);
2846 } // emit_GLSL_EXP
2847 
2848 static void emit_GLSL_LOG(Context *ctx)
2849 {
2850     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2851     char code[128];
2852     make_GLSL_destarg_assign(ctx, code, sizeof (code), "log2(%s)", src0);
2853     output_line(ctx, "%s", code);
2854 } // emit_GLSL_LOG
2855 
2856 static void emit_GLSL_LIT_helper(Context *ctx)
2857 {
2858     const char *maxp = "127.9961"; // value from the dx9 reference.
2859 
2860     if (ctx->glsl_generated_lit_helper)
2861         return;
2862 
2863     ctx->glsl_generated_lit_helper = 1;
2864 
2865     push_output(ctx, &ctx->helpers);
2866     output_line(ctx, "vec4 LIT(const vec4 src)");
2867     output_line(ctx, "{"); ctx->indent++;
2868     output_line(ctx,   "float power = clamp(src.w, -%s, %s);",maxp,maxp);
2869     output_line(ctx,   "vec4 retval = vec4(1.0, 0.0, 0.0, 1.0);");
2870     output_line(ctx,   "if (src.x > 0.0) {"); ctx->indent++;
2871     output_line(ctx,     "retval.y = src.x;");
2872     output_line(ctx,     "if (src.y > 0.0) {"); ctx->indent++;
2873     output_line(ctx,       "retval.z = pow(src.y, power);"); ctx->indent--;
2874     output_line(ctx,     "}"); ctx->indent--;
2875     output_line(ctx,   "}");
2876     output_line(ctx,   "return retval;"); ctx->indent--;
2877     output_line(ctx, "}");
2878     output_blank_line(ctx);
2879     pop_output(ctx);
2880 } // emit_GLSL_LIT_helper
2881 
2882 static void emit_GLSL_LIT(Context *ctx)
2883 {
2884     char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
2885     char code[128];
2886     emit_GLSL_LIT_helper(ctx);
2887     make_GLSL_destarg_assign(ctx, code, sizeof (code), "LIT(%s)", src0);
2888     output_line(ctx, "%s", code);
2889 } // emit_GLSL_LIT
2890 
2891 static void emit_GLSL_DST(Context *ctx)
2892 {
2893     // !!! FIXME: needs to take ctx->dst_arg.writemask into account.
2894     char src0_y[64]; make_GLSL_srcarg_string_y(ctx, 0, src0_y, sizeof (src0_y));
2895     char src1_y[64]; make_GLSL_srcarg_string_y(ctx, 1, src1_y, sizeof (src1_y));
2896     char src0_z[64]; make_GLSL_srcarg_string_z(ctx, 0, src0_z, sizeof (src0_z));
2897     char src1_w[64]; make_GLSL_srcarg_string_w(ctx, 1, src1_w, sizeof (src1_w));
2898 
2899     char code[128];
2900     make_GLSL_destarg_assign(ctx, code, sizeof (code),
2901                              "vec4(1.0, %s * %s, %s, %s)",
2902                              src0_y, src1_y, src0_z, src1_w);
2903     output_line(ctx, "%s", code);
2904 } // emit_GLSL_DST
2905 
2906 static void emit_GLSL_LRP(Context *ctx)
2907 {
2908     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2909     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
2910     char src2[64]; make_GLSL_srcarg_string_masked(ctx, 2, src2, sizeof (src2));
2911     char code[128];
2912     make_GLSL_destarg_assign(ctx, code, sizeof (code), "mix(%s, %s, %s)",
2913                              src2, src1, src0);
2914     output_line(ctx, "%s", code);
2915 } // emit_GLSL_LRP
2916 
2917 static void emit_GLSL_FRC(Context *ctx)
2918 {
2919     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2920     char code[128];
2921     make_GLSL_destarg_assign(ctx, code, sizeof (code), "fract(%s)", src0);
2922     output_line(ctx, "%s", code);
2923 } // emit_GLSL_FRC
2924 
2925 static void emit_GLSL_M4X4(Context *ctx)
2926 {
2927     char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
2928     char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0));
2929     char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1));
2930     char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2));
2931     char row3[64]; make_GLSL_srcarg_string_full(ctx, 4, row3, sizeof (row3));
2932     char code[256];
2933     make_GLSL_destarg_assign(ctx, code, sizeof (code),
2934                     "vec4(dot(%s, %s), dot(%s, %s), dot(%s, %s), dot(%s, %s))",
2935                     src0, row0, src0, row1, src0, row2, src0, row3);
2936     output_line(ctx, "%s", code);
2937 } // emit_GLSL_M4X4
2938 
2939 static void emit_GLSL_M4X3(Context *ctx)
2940 {
2941     char src0[64]; make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
2942     char row0[64]; make_GLSL_srcarg_string_full(ctx, 1, row0, sizeof (row0));
2943     char row1[64]; make_GLSL_srcarg_string_full(ctx, 2, row1, sizeof (row1));
2944     char row2[64]; make_GLSL_srcarg_string_full(ctx, 3, row2, sizeof (row2));
2945     char code[256];
2946     make_GLSL_destarg_assign(ctx, code, sizeof (code),
2947                                 "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
2948                                 src0, row0, src0, row1, src0, row2);
2949     output_line(ctx, "%s", code);
2950 } // emit_GLSL_M4X3
2951 
2952 static void emit_GLSL_M3X4(Context *ctx)
2953 {
2954     char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
2955     char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
2956     char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
2957     char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2));
2958     char row3[64]; make_GLSL_srcarg_string_vec3(ctx, 4, row3, sizeof (row3));
2959 
2960     char code[256];
2961     make_GLSL_destarg_assign(ctx, code, sizeof (code),
2962                                 "vec4(dot(%s, %s), dot(%s, %s), "
2963                                      "dot(%s, %s), dot(%s, %s))",
2964                                 src0, row0, src0, row1,
2965                                 src0, row2, src0, row3);
2966     output_line(ctx, "%s", code);
2967 } // emit_GLSL_M3X4
2968 
2969 static void emit_GLSL_M3X3(Context *ctx)
2970 {
2971     char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
2972     char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
2973     char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
2974     char row2[64]; make_GLSL_srcarg_string_vec3(ctx, 3, row2, sizeof (row2));
2975     char code[256];
2976     make_GLSL_destarg_assign(ctx, code, sizeof (code),
2977                                 "vec3(dot(%s, %s), dot(%s, %s), dot(%s, %s))",
2978                                 src0, row0, src0, row1, src0, row2);
2979     output_line(ctx, "%s", code);
2980 } // emit_GLSL_M3X3
2981 
2982 static void emit_GLSL_M3X2(Context *ctx)
2983 {
2984     char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
2985     char row0[64]; make_GLSL_srcarg_string_vec3(ctx, 1, row0, sizeof (row0));
2986     char row1[64]; make_GLSL_srcarg_string_vec3(ctx, 2, row1, sizeof (row1));
2987 
2988     char code[256];
2989     make_GLSL_destarg_assign(ctx, code, sizeof (code),
2990                                 "vec2(dot(%s, %s), dot(%s, %s))",
2991                                 src0, row0, src0, row1);
2992     output_line(ctx, "%s", code);
2993 } // emit_GLSL_M3X2
2994 
2995 static void emit_GLSL_CALL(Context *ctx)
2996 {
2997     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
2998     if (ctx->loops > 0)
2999         output_line(ctx, "%s(aL);", src0);
3000     else
3001         output_line(ctx, "%s();", src0);
3002 } // emit_GLSL_CALL
3003 
3004 static void emit_GLSL_CALLNZ(Context *ctx)
3005 {
3006     // !!! FIXME: if src1 is a constbool that's true, we can remove the
3007     // !!! FIXME:  if. If it's false, we can make this a no-op.
3008     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3009     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
3010 
3011     if (ctx->loops > 0)
3012         output_line(ctx, "if (%s) { %s(aL); }", src1, src0);
3013     else
3014         output_line(ctx, "if (%s) { %s(); }", src1, src0);
3015 } // emit_GLSL_CALLNZ
3016 
3017 static void emit_GLSL_LOOP(Context *ctx)
3018 {
3019     // !!! FIXME: swizzle?
3020     char var[64]; get_GLSL_srcarg_varname(ctx, 1, var, sizeof (var));
3021     assert(ctx->source_args[0].regnum == 0);  // in case they add aL1 someday.
3022     output_line(ctx, "{");
3023     ctx->indent++;
3024     output_line(ctx, "const int aLend = %s.x + %s.y;", var, var);
3025     output_line(ctx, "for (int aL = %s.y; aL < aLend; aL += %s.z) {", var, var);
3026     ctx->indent++;
3027 } // emit_GLSL_LOOP
3028 
3029 static void emit_GLSL_RET(Context *ctx)
3030 {
3031     // thankfully, the MSDN specs say a RET _has_ to end a function...no
3032     //  early returns. So if you hit one, you know you can safely close
3033     //  a high-level function.
3034     ctx->indent--;
3035     output_line(ctx, "}");
3036     output_blank_line(ctx);
3037     set_output(ctx, &ctx->subroutines);
3038 } // emit_GLSL_RET
3039 
3040 static void emit_GLSL_ENDLOOP(Context *ctx)
3041 {
3042     ctx->indent--;
3043     output_line(ctx, "}");
3044     ctx->indent--;
3045     output_line(ctx, "}");
3046 } // emit_GLSL_ENDLOOP
3047 
3048 static void emit_GLSL_LABEL(Context *ctx)
3049 {
3050     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3051     const int label = ctx->source_args[0].regnum;
3052     RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
3053     assert(ctx->output == ctx->subroutines);  // not mainline, etc.
3054     assert(ctx->indent == 0);  // we shouldn't be in the middle of a function.
3055 
3056     // MSDN specs say CALL* has to come before the LABEL, so we know if we
3057     //  can ditch the entire function here as unused.
3058     if (reg == NULL)
3059         set_output(ctx, &ctx->ignore);  // Func not used. Parse, but don't output.
3060 
3061     // !!! FIXME: it would be nice if we could determine if a function is
3062     // !!! FIXME:  only called once and, if so, forcibly inline it.
3063 
3064     const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
3065     output_line(ctx, "void %s(%s)", src0, uses_loopreg);
3066     output_line(ctx, "{");
3067     ctx->indent++;
3068 } // emit_GLSL_LABEL
3069 
3070 static void emit_GLSL_DCL(Context *ctx)
3071 {
3072     // no-op. We do this in our emit_attribute() and emit_uniform().
3073 } // emit_GLSL_DCL
3074 
3075 static void emit_GLSL_POW(Context *ctx)
3076 {
3077     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3078     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
3079     char code[128];
3080     make_GLSL_destarg_assign(ctx, code, sizeof (code),
3081                              "pow(abs(%s), %s)", src0, src1);
3082     output_line(ctx, "%s", code);
3083 } // emit_GLSL_POW
3084 
3085 static void emit_GLSL_CRS(Context *ctx)
3086 {
3087     // !!! FIXME: needs to take ctx->dst_arg.writemask into account.
3088     char src0[64]; make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
3089     char src1[64]; make_GLSL_srcarg_string_vec3(ctx, 1, src1, sizeof (src1));
3090     char code[128];
3091     make_GLSL_destarg_assign(ctx, code, sizeof (code),
3092                              "cross(%s, %s)", src0, src1);
3093     output_line(ctx, "%s", code);
3094 } // emit_GLSL_CRS
3095 
3096 static void emit_GLSL_SGN(Context *ctx)
3097 {
3098     // (we don't need the temporary registers specified for the D3D opcode.)
3099     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3100     char code[128];
3101     make_GLSL_destarg_assign(ctx, code, sizeof (code), "sign(%s)", src0);
3102     output_line(ctx, "%s", code);
3103 } // emit_GLSL_SGN
3104 
3105 static void emit_GLSL_ABS(Context *ctx)
3106 {
3107     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3108     char code[128];
3109     make_GLSL_destarg_assign(ctx, code, sizeof (code), "abs(%s)", src0);
3110     output_line(ctx, "%s", code);
3111 } // emit_GLSL_ABS
3112 
3113 static void emit_GLSL_NRM(Context *ctx)
3114 {
3115     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3116     char code[128];
3117     make_GLSL_destarg_assign(ctx, code, sizeof (code), "normalize(%s)", src0);
3118     output_line(ctx, "%s", code);
3119 } // emit_GLSL_NRM
3120 
3121 static void emit_GLSL_SINCOS(Context *ctx)
3122 {
3123     // we don't care about the temp registers that <= sm2 demands; ignore them.
3124     //  sm2 also talks about what components are left untouched vs. undefined,
3125     //  but we just leave those all untouched with GLSL write masks (which
3126     //  would fulfill the "undefined" requirement, too).
3127     const int mask = ctx->dest_arg.writemask;
3128     char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
3129     char code[128] = { '\0' };
3130 
3131     if (writemask_x(mask))
3132         make_GLSL_destarg_assign(ctx, code, sizeof (code), "cos(%s)", src0);
3133     else if (writemask_y(mask))
3134         make_GLSL_destarg_assign(ctx, code, sizeof (code), "sin(%s)", src0);
3135     else if (writemask_xy(mask))
3136     {
3137         make_GLSL_destarg_assign(ctx, code, sizeof (code),
3138                                  "vec2(cos(%s), sin(%s))", src0, src0);
3139     } // else if
3140 
3141     output_line(ctx, "%s", code);
3142 } // emit_GLSL_SINCOS
3143 
3144 static void emit_GLSL_REP(Context *ctx)
3145 {
3146     // !!! FIXME:
3147     // msdn docs say legal loop values are 0 to 255. We can check DEFI values
3148     //  at parse time, but if they are pulling a value from a uniform, do
3149     //  we clamp here?
3150     // !!! FIXME: swizzle is legal here, right?
3151     char src0[64]; make_GLSL_srcarg_string_x(ctx, 0, src0, sizeof (src0));
3152     const uint rep = (uint) ctx->reps;
3153     output_line(ctx, "for (int rep%u = 0; rep%u < %s; rep%u++) {",
3154                 rep, rep, src0, rep);
3155     ctx->indent++;
3156 } // emit_GLSL_REP
3157 
3158 static void emit_GLSL_ENDREP(Context *ctx)
3159 {
3160     ctx->indent--;
3161     output_line(ctx, "}");
3162 } // emit_GLSL_ENDREP
3163 
3164 static void emit_GLSL_IF(Context *ctx)
3165 {
3166     char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
3167     output_line(ctx, "if (%s) {", src0);
3168     ctx->indent++;
3169 } // emit_GLSL_IF
3170 
3171 static void emit_GLSL_IFC(Context *ctx)
3172 {
3173     const char *comp = get_GLSL_comparison_string_scalar(ctx);
3174     char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
3175     char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1));
3176     output_line(ctx, "if (%s %s %s) {", src0, comp, src1);
3177     ctx->indent++;
3178 } // emit_GLSL_IFC
3179 
3180 static void emit_GLSL_ELSE(Context *ctx)
3181 {
3182     ctx->indent--;
3183     output_line(ctx, "} else {");
3184     ctx->indent++;
3185 } // emit_GLSL_ELSE
3186 
3187 static void emit_GLSL_ENDIF(Context *ctx)
3188 {
3189     ctx->indent--;
3190     output_line(ctx, "}");
3191 } // emit_GLSL_ENDIF
3192 
3193 static void emit_GLSL_BREAK(Context *ctx)
3194 {
3195     output_line(ctx, "break;");
3196 } // emit_GLSL_BREAK
3197 
3198 static void emit_GLSL_BREAKC(Context *ctx)
3199 {
3200     const char *comp = get_GLSL_comparison_string_scalar(ctx);
3201     char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
3202     char src1[64]; make_GLSL_srcarg_string_scalar(ctx, 1, src1, sizeof (src1));
3203     output_line(ctx, "if (%s %s %s) { break; }", src0, comp, src1);
3204 } // emit_GLSL_BREAKC
3205 
3206 static void emit_GLSL_MOVA(Context *ctx)
3207 {
3208     const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
3209     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3210     char code[128];
3211 
3212     if (vecsize == 1)
3213     {
3214         make_GLSL_destarg_assign(ctx, code, sizeof (code),
3215                                  "int(floor(abs(%s) + 0.5) * sign(%s))",
3216                                  src0, src0);
3217     } // if
3218 
3219     else
3220     {
3221         make_GLSL_destarg_assign(ctx, code, sizeof (code),
3222                             "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s))",
3223                             vecsize, src0, vecsize, src0);
3224     } // else
3225 
3226     output_line(ctx, "%s", code);
3227 } // emit_GLSL_MOVA
3228 
3229 static void emit_GLSL_DEFB(Context *ctx)
3230 {
3231     char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
3232     push_output(ctx, &ctx->globals);
3233     output_line(ctx, "const bool %s = %s;",
3234                 varname, ctx->dwords[0] ? "true" : "false");
3235     pop_output(ctx);
3236 } // emit_GLSL_DEFB
3237 
3238 static void emit_GLSL_DEFI(Context *ctx)
3239 {
3240     char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
3241     const int32 *x = (const int32 *) ctx->dwords;
3242     push_output(ctx, &ctx->globals);
3243     output_line(ctx, "const ivec4 %s = ivec4(%d, %d, %d, %d);",
3244                 varname, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
3245     pop_output(ctx);
3246 } // emit_GLSL_DEFI
3247 
3248 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
3249 
3250 static void emit_GLSL_TEXKILL(Context *ctx)
3251 {
3252     char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3253     output_line(ctx, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;", dst);
3254 } // emit_GLSL_TEXKILL
3255 
3256 static void glsl_texld(Context *ctx, const int texldd)
3257 {
3258     if (!shader_version_atleast(ctx, 1, 4))
3259     {
3260         DestArgInfo *info = &ctx->dest_arg;
3261         char dst[64];
3262         char sampler[64];
3263         char code[128] = {0};
3264 
3265         assert(!texldd);
3266 
3267         RegisterList *sreg;
3268         sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, info->regnum);
3269         const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
3270 
3271         // !!! FIXME: this code counts on the register not having swizzles, etc.
3272         get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3273         get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3274                                 sampler, sizeof (sampler));
3275 
3276         if (ttype == TEXTURE_TYPE_2D)
3277         {
3278             make_GLSL_destarg_assign(ctx, code, sizeof (code),
3279                                      "texture2D(%s, %s.xy)",
3280                                      sampler, dst);
3281         }
3282         else if (ttype == TEXTURE_TYPE_CUBE)
3283         {
3284             make_GLSL_destarg_assign(ctx, code, sizeof (code),
3285                                      "textureCube(%s, %s.xyz)",
3286                                      sampler, dst);
3287         }
3288         else if (ttype == TEXTURE_TYPE_VOLUME)
3289         {
3290             make_GLSL_destarg_assign(ctx, code, sizeof (code),
3291                                      "texture3D(%s, %s.xyz)",
3292                                      sampler, dst);
3293         }
3294         else
3295         {
3296             fail(ctx, "unexpected texture type");
3297         } // else
3298         output_line(ctx, "%s", code);
3299     } // if
3300 
3301     else if (!shader_version_atleast(ctx, 2, 0))
3302     {
3303         // ps_1_4 is different, too!
3304         fail(ctx, "TEXLD == Shader Model 1.4 unimplemented.");  // !!! FIXME
3305         return;
3306     } // else if
3307 
3308     else
3309     {
3310         const SourceArgInfo *samp_arg = &ctx->source_args[1];
3311         RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
3312                                           samp_arg->regnum);
3313         const char *funcname = NULL;
3314         char src0[64] = { '\0' };
3315         char src1[64]; get_GLSL_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD?
3316         char src2[64] = { '\0' };
3317         char src3[64] = { '\0' };
3318 
3319         if (sreg == NULL)
3320         {
3321             fail(ctx, "TEXLD using undeclared sampler");
3322             return;
3323         } // if
3324 
3325         if (texldd)
3326         {
3327             make_GLSL_srcarg_string_vec2(ctx, 2, src2, sizeof (src2));
3328             make_GLSL_srcarg_string_vec2(ctx, 3, src3, sizeof (src3));
3329         } // if
3330 
3331         // !!! FIXME: can TEXLDD set instruction_controls?
3332         // !!! FIXME: does the d3d bias value map directly to GLSL?
3333         const char *biassep = "";
3334         char bias[64] = { '\0' };
3335         if (ctx->instruction_controls == CONTROL_TEXLDB)
3336         {
3337             biassep = ", ";
3338             make_GLSL_srcarg_string_w(ctx, 0, bias, sizeof (bias));
3339         } // if
3340 
3341         switch ((const TextureType) sreg->index)
3342         {
3343             case TEXTURE_TYPE_2D:
3344                 if (ctx->instruction_controls == CONTROL_TEXLDP)
3345                 {
3346                     funcname = "texture2DProj";
3347                     make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
3348                 } // if
3349                 else  // texld/texldb
3350                 {
3351                     funcname = "texture2D";
3352                     make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0));
3353                 } // else
3354                 break;
3355             case TEXTURE_TYPE_CUBE:
3356                 if (ctx->instruction_controls == CONTROL_TEXLDP)
3357                     fail(ctx, "TEXLDP on a cubemap");  // !!! FIXME: is this legal?
3358                 funcname = "textureCube";
3359                 make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
3360                 break;
3361             case TEXTURE_TYPE_VOLUME:
3362                 if (ctx->instruction_controls == CONTROL_TEXLDP)
3363                 {
3364                     funcname = "texture3DProj";
3365                     make_GLSL_srcarg_string_full(ctx, 0, src0, sizeof (src0));
3366                 } // if
3367                 else  // texld/texldb
3368                 {
3369                     funcname = "texture3D";
3370                     make_GLSL_srcarg_string_vec3(ctx, 0, src0, sizeof (src0));
3371                 } // else
3372                 break;
3373             default:
3374                 fail(ctx, "unknown texture type");
3375                 return;
3376         } // switch
3377 
3378         assert(!isscalar(ctx, ctx->shader_type, samp_arg->regtype, samp_arg->regnum));
3379         char swiz_str[6] = { '\0' };
3380         make_GLSL_swizzle_string(swiz_str, sizeof (swiz_str),
3381                                  samp_arg->swizzle, ctx->dest_arg.writemask);
3382 
3383         char code[128];
3384         if (texldd)
3385         {
3386             make_GLSL_destarg_assign(ctx, code, sizeof (code),
3387                                      "%sGrad(%s, %s, %s, %s)%s", funcname,
3388                                      src1, src0, src2, src3, swiz_str);
3389         } // if
3390         else
3391         {
3392             make_GLSL_destarg_assign(ctx, code, sizeof (code),
3393                                      "%s(%s, %s%s%s)%s", funcname,
3394                                      src1, src0, biassep, bias, swiz_str);
3395         } // else
3396 
3397         output_line(ctx, "%s", code);
3398     } // else
3399 } // glsl_texld
3400 
3401 static void emit_GLSL_TEXLD(Context *ctx)
3402 {
3403      glsl_texld(ctx, 0);
3404 } // emit_GLSL_TEXLD
3405 
3406 
3407 static void emit_GLSL_TEXBEM(Context *ctx)
3408 {
3409     DestArgInfo *info = &ctx->dest_arg;
3410     char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3411     char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src));
3412     char sampler[64];
3413     char code[512];
3414 
3415     // !!! FIXME: this code counts on the register not having swizzles, etc.
3416     get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3417                             sampler, sizeof (sampler));
3418 
3419     make_GLSL_destarg_assign(ctx, code, sizeof (code),
3420         "texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
3421         " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))",
3422         sampler,
3423         dst, sampler, src, sampler, src,
3424         dst, sampler, src, sampler, src);
3425 
3426     output_line(ctx, "%s", code);
3427 } // emit_GLSL_TEXBEM
3428 
3429 
3430 static void emit_GLSL_TEXBEML(Context *ctx)
3431 {
3432     // !!! FIXME: this code counts on the register not having swizzles, etc.
3433     DestArgInfo *info = &ctx->dest_arg;
3434     char dst[64]; get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3435     char src[64]; get_GLSL_srcarg_varname(ctx, 0, src, sizeof (src));
3436     char sampler[64];
3437     char code[512];
3438 
3439     get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3440                             sampler, sizeof (sampler));
3441 
3442     make_GLSL_destarg_assign(ctx, code, sizeof (code),
3443         "(texture2D(%s, vec2(%s.x + (%s_texbem.x * %s.x) + (%s_texbem.z * %s.y),"
3444         " %s.y + (%s_texbem.y * %s.x) + (%s_texbem.w * %s.y)))) *"
3445         " ((%s.z * %s_texbeml.x) + %s_texbem.y)",
3446         sampler,
3447         dst, sampler, src, sampler, src,
3448         dst, sampler, src, sampler, src,
3449         src, sampler, sampler);
3450 
3451     output_line(ctx, "%s", code);
3452 } // emit_GLSL_TEXBEML
3453 
3454 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) // !!! FIXME
3455 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) // !!! FIXME
3456 
3457 
3458 static void emit_GLSL_TEXM3X2PAD(Context *ctx)
3459 {
3460     // no-op ... work happens in emit_GLSL_TEXM3X2TEX().
3461 } // emit_GLSL_TEXM3X2PAD
3462 
3463 static void emit_GLSL_TEXM3X2TEX(Context *ctx)
3464 {
3465     if (ctx->texm3x2pad_src0 == -1)
3466         return;
3467 
3468     DestArgInfo *info = &ctx->dest_arg;
3469     char dst[64];
3470     char src0[64];
3471     char src1[64];
3472     char src2[64];
3473     char sampler[64];
3474     char code[512];
3475 
3476     // !!! FIXME: this code counts on the register not having swizzles, etc.
3477     get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3478                             sampler, sizeof (sampler));
3479     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0,
3480                             src0, sizeof (src0));
3481     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0,
3482                             src1, sizeof (src1));
3483     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
3484                             src2, sizeof (src2));
3485     get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3486 
3487     make_GLSL_destarg_assign(ctx, code, sizeof (code),
3488         "texture2D(%s, vec2(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz)))",
3489         sampler, src0, src1, src2, dst);
3490 
3491     output_line(ctx, "%s", code);
3492 } // emit_GLSL_TEXM3X2TEX
3493 
3494 static void emit_GLSL_TEXM3X3PAD(Context *ctx)
3495 {
3496     // no-op ... work happens in emit_GLSL_TEXM3X3*().
3497 } // emit_GLSL_TEXM3X3PAD
3498 
3499 static void emit_GLSL_TEXM3X3TEX(Context *ctx)
3500 {
3501     if (ctx->texm3x3pad_src1 == -1)
3502         return;
3503 
3504     DestArgInfo *info = &ctx->dest_arg;
3505     char dst[64];
3506     char src0[64];
3507     char src1[64];
3508     char src2[64];
3509     char src3[64];
3510     char src4[64];
3511     char sampler[64];
3512     char code[512];
3513 
3514     // !!! FIXME: this code counts on the register not having swizzles, etc.
3515     get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3516                             sampler, sizeof (sampler));
3517 
3518     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
3519                             src0, sizeof (src0));
3520     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
3521                             src1, sizeof (src1));
3522     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
3523                             src2, sizeof (src2));
3524     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
3525                             src3, sizeof (src3));
3526     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
3527                             src4, sizeof (src4));
3528     get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3529 
3530     RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
3531                                       info->regnum);
3532     const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
3533     const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
3534 
3535     make_GLSL_destarg_assign(ctx, code, sizeof (code),
3536         "texture%s(%s,"
3537             " vec3(dot(%s.xyz, %s.xyz),"
3538             " dot(%s.xyz, %s.xyz),"
3539             " dot(%s.xyz, %s.xyz)))",
3540         ttypestr, sampler, src0, src1, src2, src3, dst, src4);
3541 
3542     output_line(ctx, "%s", code);
3543 } // emit_GLSL_TEXM3X3TEX
3544 
3545 static void emit_GLSL_TEXM3X3SPEC_helper(Context *ctx)
3546 {
3547     if (ctx->glsl_generated_texm3x3spec_helper)
3548         return;
3549 
3550     ctx->glsl_generated_texm3x3spec_helper = 1;
3551 
3552     push_output(ctx, &ctx->helpers);
3553     output_line(ctx, "vec3 TEXM3X3SPEC_reflection(const vec3 normal, const vec3 eyeray)");
3554     output_line(ctx, "{"); ctx->indent++;
3555     output_line(ctx,   "return (2.0 * ((normal * eyeray) / (normal * normal)) * normal) - eyeray;"); ctx->indent--;
3556     output_line(ctx, "}");
3557     output_blank_line(ctx);
3558     pop_output(ctx);
3559 } // emit_GLSL_TEXM3X3SPEC_helper
3560 
3561 static void emit_GLSL_TEXM3X3SPEC(Context *ctx)
3562 {
3563     if (ctx->texm3x3pad_src1 == -1)
3564         return;
3565 
3566     DestArgInfo *info = &ctx->dest_arg;
3567     char dst[64];
3568     char src0[64];
3569     char src1[64];
3570     char src2[64];
3571     char src3[64];
3572     char src4[64];
3573     char src5[64];
3574     char sampler[64];
3575     char code[512];
3576 
3577     emit_GLSL_TEXM3X3SPEC_helper(ctx);
3578 
3579     // !!! FIXME: this code counts on the register not having swizzles, etc.
3580     get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3581                             sampler, sizeof (sampler));
3582 
3583     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
3584                             src0, sizeof (src0));
3585     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
3586                             src1, sizeof (src1));
3587     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
3588                             src2, sizeof (src2));
3589     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
3590                             src3, sizeof (src3));
3591     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
3592                             src4, sizeof (src4));
3593     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum,
3594                             src5, sizeof (src5));
3595     get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3596 
3597     RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
3598                                       info->regnum);
3599     const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
3600     const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
3601 
3602     make_GLSL_destarg_assign(ctx, code, sizeof (code),
3603         "texture%s(%s, "
3604             "TEXM3X3SPEC_reflection("
3605                 "vec3("
3606                     "dot(%s.xyz, %s.xyz), "
3607                     "dot(%s.xyz, %s.xyz), "
3608                     "dot(%s.xyz, %s.xyz)"
3609                 "),"
3610                 "%s.xyz,"
3611             ")"
3612         ")",
3613         ttypestr, sampler, src0, src1, src2, src3, dst, src4, src5);
3614 
3615     output_line(ctx, "%s", code);
3616 } // emit_GLSL_TEXM3X3SPEC
3617 
3618 static void emit_GLSL_TEXM3X3VSPEC(Context *ctx)
3619 {
3620     if (ctx->texm3x3pad_src1 == -1)
3621         return;
3622 
3623     DestArgInfo *info = &ctx->dest_arg;
3624     char dst[64];
3625     char src0[64];
3626     char src1[64];
3627     char src2[64];
3628     char src3[64];
3629     char src4[64];
3630     char sampler[64];
3631     char code[512];
3632 
3633     emit_GLSL_TEXM3X3SPEC_helper(ctx);
3634 
3635     // !!! FIXME: this code counts on the register not having swizzles, etc.
3636     get_GLSL_varname_in_buf(ctx, REG_TYPE_SAMPLER, info->regnum,
3637                             sampler, sizeof (sampler));
3638 
3639     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
3640                             src0, sizeof (src0));
3641     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
3642                             src1, sizeof (src1));
3643     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
3644                             src2, sizeof (src2));
3645     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
3646                             src3, sizeof (src3));
3647     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
3648                             src4, sizeof (src4));
3649     get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3650 
3651     RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
3652                                       info->regnum);
3653     const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
3654     const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "Cube" : "3D";
3655 
3656     make_GLSL_destarg_assign(ctx, code, sizeof (code),
3657         "texture%s(%s, "
3658             "TEXM3X3SPEC_reflection("
3659                 "vec3("
3660                     "dot(%s.xyz, %s.xyz), "
3661                     "dot(%s.xyz, %s.xyz), "
3662                     "dot(%s.xyz, %s.xyz)"
3663                 "), "
3664                 "vec3(%s.w, %s.w, %s.w)"
3665             ")"
3666         ")",
3667         ttypestr, sampler, src0, src1, src2, src3, dst, src4, src0, src2, dst);
3668 
3669     output_line(ctx, "%s", code);
3670 } // emit_GLSL_TEXM3X3VSPEC
3671 
3672 static void emit_GLSL_EXPP(Context *ctx)
3673 {
3674     // !!! FIXME: msdn's asm docs don't list this opcode, I'll have to check the driver documentation.
3675     emit_GLSL_EXP(ctx);  // I guess this is just partial precision EXP?
3676 } // emit_GLSL_EXPP
3677 
3678 static void emit_GLSL_LOGP(Context *ctx)
3679 {
3680     // LOGP is just low-precision LOG, but we'll take the higher precision.
3681     emit_GLSL_LOG(ctx);
3682 } // emit_GLSL_LOGP
3683 
3684 // common code between CMP and CND.
3685 static void emit_GLSL_comparison_operations(Context *ctx, const char *cmp)
3686 {
3687     int i, j;
3688     DestArgInfo *dst = &ctx->dest_arg;
3689     const SourceArgInfo *srcarg0 = &ctx->source_args[0];
3690     const int origmask = dst->writemask;
3691     int used_swiz[4] = { 0, 0, 0, 0 };
3692     const int writemask[4] = { dst->writemask0, dst->writemask1,
3693                                dst->writemask2, dst->writemask3 };
3694     const int src0swiz[4] = { srcarg0->swizzle_x, srcarg0->swizzle_y,
3695                               srcarg0->swizzle_z, srcarg0->swizzle_w };
3696 
3697     for (i = 0; i < 4; i++)
3698     {
3699         int mask = (1 << i);
3700 
3701         if (!writemask[i]) continue;
3702         if (used_swiz[i]) continue;
3703 
3704         // This is a swizzle we haven't checked yet.
3705         used_swiz[i] = 1;
3706 
3707         // see if there are any other elements swizzled to match (.yyyy)
3708         for (j = i + 1; j < 4; j++)
3709         {
3710             if (!writemask[j]) continue;
3711             if (src0swiz[i] != src0swiz[j]) continue;
3712             mask |= (1 << j);
3713             used_swiz[j] = 1;
3714         } // for
3715 
3716         // okay, (mask) should be the writemask of swizzles we like.
3717 
3718         //return make_GLSL_srcarg_string(ctx, idx, (1 << 0));
3719 
3720         char src0[64];
3721         char src1[64];
3722         char src2[64];
3723         make_GLSL_srcarg_string(ctx, 0, (1 << i), src0, sizeof (src0));
3724         make_GLSL_srcarg_string(ctx, 1, mask, src1, sizeof (src1));
3725         make_GLSL_srcarg_string(ctx, 2, mask, src2, sizeof (src2));
3726 
3727         set_dstarg_writemask(dst, mask);
3728 
3729         char code[128];
3730         make_GLSL_destarg_assign(ctx, code, sizeof (code),
3731                                  "((%s %s) ? %s : %s)",
3732                                  src0, cmp, src1, src2);
3733         output_line(ctx, "%s", code);
3734     } // for
3735 
3736     set_dstarg_writemask(dst, origmask);
3737 } // emit_GLSL_comparison_operations
3738 
3739 static void emit_GLSL_CND(Context *ctx)
3740 {
3741     emit_GLSL_comparison_operations(ctx, "> 0.5");
3742 } // emit_GLSL_CND
3743 
3744 static void emit_GLSL_DEF(Context *ctx)
3745 {
3746     const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
3747     char varname[64]; get_GLSL_destarg_varname(ctx, varname, sizeof (varname));
3748     char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1);
3749     char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1);
3750     char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1);
3751     char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1);
3752 
3753     push_output(ctx, &ctx->globals);
3754     output_line(ctx, "const vec4 %s = vec4(%s, %s, %s, %s);",
3755                 varname, val0, val1, val2, val3);
3756     pop_output(ctx);
3757 } // emit_GLSL_DEF
3758 
3759 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) // !!! FIXME
3760 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) // !!! FIXME
3761 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) // !!! FIXME
3762 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) // !!! FIXME
3763 
3764 static void emit_GLSL_TEXM3X3(Context *ctx)
3765 {
3766     if (ctx->texm3x3pad_src1 == -1)
3767         return;
3768 
3769     char dst[64];
3770     char src0[64];
3771     char src1[64];
3772     char src2[64];
3773     char src3[64];
3774     char src4[64];
3775     char code[512];
3776 
3777     // !!! FIXME: this code counts on the register not having swizzles, etc.
3778     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
3779                             src0, sizeof (src0));
3780     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
3781                             src1, sizeof (src1));
3782     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
3783                             src2, sizeof (src2));
3784     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
3785                             src3, sizeof (src3));
3786     get_GLSL_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
3787                             src4, sizeof (src4));
3788     get_GLSL_destarg_varname(ctx, dst, sizeof (dst));
3789 
3790     make_GLSL_destarg_assign(ctx, code, sizeof (code),
3791         "vec4(dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), dot(%s.xyz, %s.xyz), 1.0)",
3792         src0, src1, src2, src3, dst, src4);
3793 
3794     output_line(ctx, "%s", code);
3795 } // emit_GLSL_TEXM3X3
3796 
3797 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) // !!! FIXME
3798 
3799 static void emit_GLSL_CMP(Context *ctx)
3800 {
3801     emit_GLSL_comparison_operations(ctx, ">= 0.0");
3802 } // emit_GLSL_CMP
3803 
3804 EMIT_GLSL_OPCODE_UNIMPLEMENTED_FUNC(BEM) // !!! FIXME
3805 
3806 static void emit_GLSL_DP2ADD(Context *ctx)
3807 {
3808     char src0[64]; make_GLSL_srcarg_string_vec2(ctx, 0, src0, sizeof (src0));
3809     char src1[64]; make_GLSL_srcarg_string_vec2(ctx, 1, src1, sizeof (src1));
3810     char src2[64]; make_GLSL_srcarg_string_scalar(ctx, 2, src2, sizeof (src2));
3811     char extra[64]; snprintf(extra, sizeof (extra), " + %s", src2);
3812     emit_GLSL_dotprod(ctx, src0, src1, extra);
3813 } // emit_GLSL_DP2ADD
3814 
3815 static void emit_GLSL_DSX(Context *ctx)
3816 {
3817     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3818     char code[128];
3819     make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdx(%s)", src0);
3820     output_line(ctx, "%s", code);
3821 } // emit_GLSL_DSX
3822 
3823 static void emit_GLSL_DSY(Context *ctx)
3824 {
3825     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3826     char code[128];
3827     make_GLSL_destarg_assign(ctx, code, sizeof (code), "dFdy(%s)", src0);
3828     output_line(ctx, "%s", code);
3829 } // emit_GLSL_DSY
3830 
3831 static void emit_GLSL_TEXLDD(Context *ctx)
3832 {
3833     // !!! FIXME:
3834     // GLSL 1.30 introduced textureGrad() for this, but it looks like the
3835     //  functions are overloaded instead of texture2DGrad() (etc).
3836 
3837     // GL_shader_texture_lod and GL_EXT_gpu_shader4 added texture2DGrad*(),
3838     //  so we'll use them if available. Failing that, we'll just fallback
3839     //  to a regular texture2D call and hope the mipmap it chooses is close
3840     //  enough.
3841     if (!ctx->glsl_generated_texldd_setup)
3842     {
3843         ctx->glsl_generated_texldd_setup = 1;
3844         push_output(ctx, &ctx->preflight);
3845         output_line(ctx, "#if GL_ARB_shader_texture_lod");
3846         output_line(ctx, "#extension GL_ARB_shader_texture_lod : enable");
3847         output_line(ctx, "#define texture2DGrad texture2DGradARB");
3848         output_line(ctx, "#define texture2DProjGrad texture2DProjARB");
3849         output_line(ctx, "#elif GL_EXT_gpu_shader4");
3850         output_line(ctx, "#extension GL_EXT_gpu_shader4 : enable");
3851         output_line(ctx, "#else");
3852         output_line(ctx, "#define texture2DGrad(a,b,c,d) texture2D(a,b)");
3853         output_line(ctx, "#define texture2DProjGrad(a,b,c,d) texture2DProj(a,b)");
3854         output_line(ctx, "#endif");
3855         output_blank_line(ctx);
3856         pop_output(ctx);
3857     } // if
3858 
3859     glsl_texld(ctx, 1);
3860 } // emit_GLSL_TEXLDD
3861 
3862 static void emit_GLSL_SETP(Context *ctx)
3863 {
3864     const int vecsize = vecsize_from_writemask(ctx->dest_arg.writemask);
3865     char src0[64]; make_GLSL_srcarg_string_masked(ctx, 0, src0, sizeof (src0));
3866     char src1[64]; make_GLSL_srcarg_string_masked(ctx, 1, src1, sizeof (src1));
3867     char code[128];
3868 
3869     // destination is always predicate register (which is type bvec4).
3870     if (vecsize == 1)
3871     {
3872         const char *comp = get_GLSL_comparison_string_scalar(ctx);
3873         make_GLSL_destarg_assign(ctx, code, sizeof (code),
3874                                  "(%s %s %s)", src0, comp, src1);
3875     } // if
3876     else
3877     {
3878         const char *comp = get_GLSL_comparison_string_vector(ctx);
3879         make_GLSL_destarg_assign(ctx, code, sizeof (code),
3880                                  "%s(%s, %s)", comp, src0, src1);
3881     } // else
3882 
3883     output_line(ctx, "%s", code);
3884 } // emit_GLSL_SETP
3885 
3886 static void emit_GLSL_TEXLDL(Context *ctx)
3887 {
3888     // !!! FIXME: The spec says we can't use GLSL's texture*Lod() built-ins
3889     // !!! FIXME:  from fragment shaders for some inexplicable reason.
3890     // !!! FIXME:  For now, you'll just have to suffer with the potentially
3891     // !!! FIXME:  wrong mipmap until I can figure something out.
3892     emit_GLSL_TEXLD(ctx);
3893 } // emit_GLSL_TEXLDL
3894 
3895 static void emit_GLSL_BREAKP(Context *ctx)
3896 {
3897     char src0[64]; make_GLSL_srcarg_string_scalar(ctx, 0, src0, sizeof (src0));
3898     output_line(ctx, "if (%s) { break; }", src0);
3899 } // emit_GLSL_BREAKP
3900 
3901 static void emit_GLSL_RESERVED(Context *ctx)
3902 {
3903     // do nothing; fails in the state machine.
3904 } // emit_GLSL_RESERVED
3905 
3906 #endif  // SUPPORT_PROFILE_GLSL
3907 
3908 
3909 
3910 #if !SUPPORT_PROFILE_ARB1
3911 #define PROFILE_EMITTER_ARB1(op)
3912 #else
3913 #undef AT_LEAST_ONE_PROFILE
3914 #define AT_LEAST_ONE_PROFILE 1
3915 #define PROFILE_EMITTER_ARB1(op) emit_ARB1_##op,
3916 
3917 static inline const char *get_ARB1_register_string(Context *ctx,
3918                         const RegisterType regtype, const int regnum,
3919                         char *regnum_str, const size_t regnum_size)
3920 {
3921     // turns out these are identical at the moment.
3922     return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
3923 } // get_ARB1_register_string
3924 
3925 static const char *allocate_ARB1_scratch_reg_name(Context *ctx, char *buf,
3926                                                   const size_t buflen)
3927 {
3928     const int scratch = allocate_scratch_register(ctx);
3929     snprintf(buf, buflen, "scratch%d", scratch);
3930     return buf;
3931 } // allocate_ARB1_scratch_reg_name
3932 
3933 static inline const char *get_ARB1_branch_label_name(Context *ctx, const int id,
3934                                                 char *buf, const size_t buflen)
3935 {
3936     snprintf(buf, buflen, "branch_label%d", id);
3937     return buf;
3938 } // get_ARB1_branch_label_name
3939 
3940 static const char *get_ARB1_varname_in_buf(Context *ctx, const RegisterType rt,
3941                                            const int regnum, char *buf,
3942                                            const size_t buflen)
3943 {
3944     // turns out these are identical at the moment.
3945     return get_D3D_varname_in_buf(ctx, rt, regnum, buf, buflen);
3946 } // get_ARB1_varname_in_buf
3947 
3948 static const char *get_ARB1_varname(Context *ctx, const RegisterType rt,
3949                                     const int regnum)
3950 {
3951     // turns out these are identical at the moment.
3952     return get_D3D_varname(ctx, rt, regnum);
3953 } // get_ARB1_varname
3954 
3955 
3956 static inline const char *get_ARB1_const_array_varname_in_buf(Context *ctx,
3957                                                 const int base, const int size,
3958                                                 char *buf, const size_t buflen)
3959 {
3960     snprintf(buf, buflen, "c_array_%d_%d", base, size);
3961     return buf;
3962 } // get_ARB1_const_array_varname_in_buf
3963 
3964 
3965 static const char *get_ARB1_const_array_varname(Context *ctx, int base, int size)
3966 {
3967     char buf[64];
3968     get_ARB1_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
3969     return StrDup(ctx, buf);
3970 } // get_ARB1_const_array_varname
3971 
3972 
3973 static const char *make_ARB1_srcarg_string_in_buf(Context *ctx,
3974                                                   const SourceArgInfo *arg,
3975                                                   char *buf, size_t buflen)
3976 {
3977     // !!! FIXME: this can hit pathological cases where we look like this...
3978     //
3979     //    dp3 r1.xyz, t0_bx2, t0_bx2
3980     //    mad r1.xyz, t0_bias, 1-r1, t0_bx2
3981     //
3982     // ...which do a lot of duplicate work in arb1...
3983     //
3984     //    SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
3985     //    MUL scratch0, scratch0, { 2.0, 2.0, 2.0, 2.0 };
3986     //    SUB scratch1, t0, { 0.5, 0.5, 0.5, 0.5 };
3987     //    MUL scratch1, scratch1, { 2.0, 2.0, 2.0, 2.0 };
3988     //    DP3 r1.xyz, scratch0, scratch1;
3989     //    SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
3990     //    SUB scratch1, { 1.0, 1.0, 1.0, 1.0 }, r1;
3991     //    SUB scratch2, t0, { 0.5, 0.5, 0.5, 0.5 };
3992     //    MUL scratch2, scratch2, { 2.0, 2.0, 2.0, 2.0 };
3993     //    MAD r1.xyz, scratch0, scratch1, scratch2;
3994     //
3995     // ...notice that the dp3 calculates the same value into two scratch
3996     //  registers. This case is easier to handle; just see if multiple
3997     //  source args are identical, build it up once, and use the same
3998     //  scratch register for multiple arguments in that opcode.
3999     //  Even better still, only calculate things once across instructions,
4000     //  and be smart about letting it linger in a scratch register until we
4001     //  definitely don't need the calculation anymore. That's harder to
4002     //  write, though.
4003 
4004     char regnum_str[16] = { '\0' };
4005 
4006     // !!! FIXME: use get_ARB1_varname_in_buf() instead?
4007     const char *regtype_str = NULL;
4008     if (!arg->relative)
4009     {
4010         regtype_str = get_ARB1_register_string(ctx, arg->regtype,
4011                                                arg->regnum, regnum_str,
4012                                                sizeof (regnum_str));
4013     } // if
4014 
4015     const char *rel_lbracket = "";
4016     char rel_offset[32] = { '\0' };
4017     const char *rel_rbracket = "";
4018     char rel_swizzle[4] = { '\0' };
4019     const char *rel_regtype_str = "";
4020     if (arg->relative)
4021     {
4022         rel_regtype_str = get_ARB1_varname_in_buf(ctx, arg->relative_regtype,
4023                                                   arg->relative_regnum,
4024                                                   (char *) alloca(64), 64);
4025 
4026         rel_swizzle[0] = '.';
4027         rel_swizzle[1] = swizzle_channels[arg->relative_component];
4028         rel_swizzle[2] = '\0';
4029 
4030         if (!support_nv2(ctx))
4031         {
4032             // The address register in ARB1 only allows the '.x' component, so
4033             //  we need to load the component we need from a temp vector
4034             //  register into .x as needed.
4035             assert(arg->relative_regtype == REG_TYPE_ADDRESS);
4036             assert(arg->relative_regnum == 0);
4037             if (ctx->last_address_reg_component != arg->relative_component)
4038             {
4039                 output_line(ctx, "ARL %s.x, addr%d.%c;", rel_regtype_str,
4040                             arg->relative_regnum,
4041                             swizzle_channels[arg->relative_component]);
4042                 ctx->last_address_reg_component = arg->relative_component;
4043             } // if
4044 
4045             rel_swizzle[1] = 'x';
4046         } // if
4047 
4048         if (arg->regtype == REG_TYPE_INPUT)
4049             regtype_str = "vertex.attrib";
4050         else
4051         {
4052             assert(arg->regtype == REG_TYPE_CONST);
4053             const int arrayidx = arg->relative_array->index;
4054             const int arraysize = arg->relative_array->count;
4055             const int offset = arg->regnum - arrayidx;
4056             assert(offset >= 0);
4057             regtype_str = get_ARB1_const_array_varname_in_buf(ctx, arrayidx,
4058                                            arraysize, (char *) alloca(64), 64);
4059             if (offset != 0)
4060                 snprintf(rel_offset, sizeof (rel_offset), " + %d", offset);
4061         } // else
4062 
4063         rel_lbracket = "[";
4064         rel_rbracket = "]";
4065     } // if
4066 
4067     // This is the source register with everything but swizzle and source mods.
4068     snprintf(buf, buflen, "%s%s%s%s%s%s%s", regtype_str, regnum_str,
4069              rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset,
4070              rel_rbracket);
4071 
4072     // Some of the source mods need to generate instructions to a temp
4073     //  register, in which case we'll replace the register name.
4074     const SourceMod mod = arg->src_mod;
4075     const int inplace = ( (mod == SRCMOD_NONE) || (mod == SRCMOD_NEGATE) ||
4076                           ((mod == SRCMOD_ABS) && support_nv2(ctx)) );
4077 
4078     if (!inplace)
4079     {
4080         const size_t len = 64;
4081         char *stackbuf = (char *) alloca(len);
4082         regtype_str = allocate_ARB1_scratch_reg_name(ctx, stackbuf, len);
4083         regnum_str[0] = '\0'; // move value to scratch register.
4084         rel_lbracket = "";   // scratch register won't use array.
4085         rel_rbracket = "";
4086         rel_offset[0] = '\0';
4087         rel_swizzle[0] = '\0';
4088         rel_regtype_str = "";
4089     } // if
4090 
4091     const char *premod_str = "";
4092     const char *postmod_str = "";
4093     switch (mod)
4094     {
4095         case SRCMOD_NEGATE:
4096             premod_str = "-";
4097             break;
4098 
4099         case SRCMOD_BIASNEGATE:
4100             premod_str = "-";
4101             // fall through.
4102         case SRCMOD_BIAS:
4103             output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };",
4104                         regtype_str, buf);
4105             break;
4106 
4107         case SRCMOD_SIGNNEGATE:
4108             premod_str = "-";
4109             // fall through.
4110         case SRCMOD_SIGN:
4111             output_line(ctx,
4112                 "MAD %s, %s, { 2.0, 2.0, 2.0, 2.0 }, { -1.0, -1.0, -1.0, -1.0 };",
4113                 regtype_str, buf);
4114             break;
4115 
4116         case SRCMOD_COMPLEMENT:
4117             output_line(ctx, "SUB %s, { 1.0, 1.0, 1.0, 1.0 }, %s;",
4118                         regtype_str, buf);
4119             break;
4120 
4121         case SRCMOD_X2NEGATE:
4122             premod_str = "-";
4123             // fall through.
4124         case SRCMOD_X2:
4125             output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };",
4126                         regtype_str, buf);
4127             break;
4128 
4129         case SRCMOD_DZ:
4130             fail(ctx, "SRCMOD_DZ currently unsupported in arb1");
4131             postmod_str = "_dz";
4132             break;
4133 
4134         case SRCMOD_DW:
4135             fail(ctx, "SRCMOD_DW currently unsupported in arb1");
4136             postmod_str = "_dw";
4137             break;
4138 
4139         case SRCMOD_ABSNEGATE:
4140             premod_str = "-";
4141             // fall through.
4142         case SRCMOD_ABS:
4143             if (!support_nv2(ctx))  // GL_NV_vertex_program2_option adds this.
4144                 output_line(ctx, "ABS %s, %s;", regtype_str, buf);
4145             else
4146             {
4147                 premod_str = (mod == SRCMOD_ABSNEGATE) ? "-|" : "|";
4148                 postmod_str = "|";
4149             } // else
4150             break;
4151 
4152         case SRCMOD_NOT:
4153             fail(ctx, "SRCMOD_NOT currently unsupported in arb1");
4154             premod_str = "!";
4155             break;
4156 
4157         case SRCMOD_NONE:
4158         case SRCMOD_TOTAL:
4159              break;  // stop compiler whining.
4160     } // switch
4161 
4162     char swizzle_str[6];
4163     size_t i = 0;
4164 
4165     if (support_nv4(ctx))  // vFace must be output as "vFace.x" in nv4.
4166     {
4167         if (arg->regtype == REG_TYPE_MISCTYPE)
4168         {
4169             if ( ((const MiscTypeType) arg->regnum) == MISCTYPE_TYPE_FACE )
4170             {
4171                 swizzle_str[i++] = '.';
4172                 swizzle_str[i++] = 'x';
4173             } // if
4174         } // if
4175     } // if
4176 
4177     const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
4178     if (!scalar && !no_swizzle(arg->swizzle))
4179     {
4180         swizzle_str[i++] = '.';
4181 
4182         // .xxxx is the same as .x, but .xx is illegal...scalar or full!
4183         if (replicate_swizzle(arg->swizzle))
4184             swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
4185         else
4186         {
4187             swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
4188             swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
4189             swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
4190             swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
4191         } // else
4192     } // if
4193     swizzle_str[i] = '\0';
4194     assert(i < sizeof (swizzle_str));
4195 
4196     snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", premod_str,
4197              regtype_str, regnum_str, rel_lbracket,
4198              rel_regtype_str, rel_swizzle, rel_offset, rel_rbracket,
4199              swizzle_str, postmod_str);
4200     // !!! FIXME: make sure the scratch buffer was large enough.
4201     return buf;
4202 } // make_ARB1_srcarg_string_in_buf
4203 
4204 static const char *get_ARB1_destarg_varname(Context *ctx, char *buf,
4205                                             const size_t buflen)
4206 {
4207     const DestArgInfo *arg = &ctx->dest_arg;
4208     return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
4209 } // get_ARB1_destarg_varname
4210 
4211 static const char *get_ARB1_srcarg_varname(Context *ctx, const size_t idx,
4212                                            char *buf, const size_t buflen)
4213 {
4214     if (idx >= STATICARRAYLEN(ctx->source_args))
4215     {
4216         fail(ctx, "Too many source args");
4217         *buf = '\0';
4218         return buf;
4219     } // if
4220 
4221     const SourceArgInfo *arg = &ctx->source_args[idx];
4222     return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
4223 } // get_ARB1_srcarg_varname
4224 
4225 
4226 static const char *make_ARB1_destarg_string(Context *ctx, char *buf,
4227                                             const size_t buflen)
4228 {
4229     const DestArgInfo *arg = &ctx->dest_arg;
4230 
4231     *buf = '\0';
4232 
4233     const char *sat_str = "";
4234     if (arg->result_mod & MOD_SATURATE)
4235     {
4236         // nv4 can use ".SAT" in all program types.
4237         // For less than nv4, the "_SAT" modifier is only available in
4238         //  fragment shaders. Every thing else will fake it later in
4239         //  emit_ARB1_dest_modifiers() ...
4240         if (support_nv4(ctx))
4241             sat_str = ".SAT";
4242         else if (shader_is_pixel(ctx))
4243             sat_str = "_SAT";
4244     } // if
4245 
4246     const char *pp_str = "";
4247     if (arg->result_mod & MOD_PP)
4248     {
4249         // Most ARB1 profiles can't do partial precision (MOD_PP), but that's
4250         //  okay. The spec says lots of Direct3D implementations ignore the
4251         //  flag anyhow.
4252         if (support_nv4(ctx))
4253             pp_str = "H";
4254     } // if
4255 
4256     // CENTROID only allowed in DCL opcodes, which shouldn't come through here.
4257     assert((arg->result_mod & MOD_CENTROID) == 0);
4258 
4259     char regnum_str[16];
4260     const char *regtype_str = get_ARB1_register_string(ctx, arg->regtype,
4261                                                        arg->regnum, regnum_str,
4262                                                        sizeof (regnum_str));
4263     if (regtype_str == NULL)
4264     {
4265         fail(ctx, "Unknown destination register type.");
4266         return buf;
4267     } // if
4268 
4269     char writemask_str[6];
4270     size_t i = 0;
4271     const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
4272     if (!scalar && !writemask_xyzw(arg->writemask))
4273     {
4274         writemask_str[i++] = '.';
4275         if (arg->writemask0) writemask_str[i++] = 'x';
4276         if (arg->writemask1) writemask_str[i++] = 'y';
4277         if (arg->writemask2) writemask_str[i++] = 'z';
4278         if (arg->writemask3) writemask_str[i++] = 'w';
4279     } // if
4280     writemask_str[i] = '\0';
4281     assert(i < sizeof (writemask_str));
4282 
4283     const char *pred_left = "";
4284     const char *pred_right = "";
4285     char pred[32] = { '\0' };
4286     if (ctx->predicated)
4287     {
4288         fail(ctx, "dest register predication currently unsupported in arb1");
4289         return buf;
4290         pred_left = "(";
4291         pred_right = ") ";
4292         make_ARB1_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
4293                                        pred, sizeof (pred));
4294     } // if
4295 
4296     snprintf(buf, buflen, "%s%s %s%s%s", pp_str, sat_str,
4297              regtype_str, regnum_str, writemask_str);
4298     // !!! FIXME: make sure the scratch buffer was large enough.
4299     return buf;
4300 } // make_ARB1_destarg_string
4301 
4302 
4303 static void emit_ARB1_dest_modifiers(Context *ctx)
4304 {
4305     const DestArgInfo *arg = &ctx->dest_arg;
4306 
4307     if (arg->result_shift != 0x0)
4308     {
4309         char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4310         const char *multiplier = NULL;
4311 
4312         switch (arg->result_shift)
4313         {
4314             case 0x1: multiplier = "2.0"; break;
4315             case 0x2: multiplier = "4.0"; break;
4316             case 0x3: multiplier = "8.0"; break;
4317             case 0xD: multiplier = "0.125"; break;
4318             case 0xE: multiplier = "0.25"; break;
4319             case 0xF: multiplier = "0.5"; break;
4320         } // switch
4321 
4322         if (multiplier != NULL)
4323         {
4324             char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
4325             output_line(ctx, "MUL%s, %s, %s;", dst, var, multiplier);
4326         } // if
4327     } // if
4328 
4329     if (arg->result_mod & MOD_SATURATE)
4330     {
4331         // nv4 and/or pixel shaders just used the "SAT" modifier, instead.
4332         if ( (!support_nv4(ctx)) && (!shader_is_pixel(ctx)) )
4333         {
4334             char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
4335             char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4336             output_line(ctx, "MIN%s, %s, 1.0;", dst, var);
4337             output_line(ctx, "MAX%s, %s, 0.0;", dst, var);
4338         } // if
4339     } // if
4340 } // emit_ARB1_dest_modifiers
4341 
4342 
4343 static const char *make_ARB1_srcarg_string(Context *ctx, const size_t idx,
4344                                            char *buf, const size_t buflen)
4345 {
4346     if (idx >= STATICARRAYLEN(ctx->source_args))
4347     {
4348         fail(ctx, "Too many source args");
4349         *buf = '\0';
4350         return buf;
4351     } // if
4352 
4353     const SourceArgInfo *arg = &ctx->source_args[idx];
4354     return make_ARB1_srcarg_string_in_buf(ctx, arg, buf, buflen);
4355 } // make_ARB1_srcarg_string
4356 
4357 static void emit_ARB1_opcode_ds(Context *ctx, const char *opcode)
4358 {
4359     char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4360     char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
4361     output_line(ctx, "%s%s, %s;", opcode, dst, src0);
4362     emit_ARB1_dest_modifiers(ctx);
4363 } // emit_ARB1_opcode_ds
4364 
4365 static void emit_ARB1_opcode_dss(Context *ctx, const char *opcode)
4366 {
4367     char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4368     char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
4369     char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
4370     output_line(ctx, "%s%s, %s, %s;", opcode, dst, src0, src1);
4371     emit_ARB1_dest_modifiers(ctx);
4372 } // emit_ARB1_opcode_dss
4373 
4374 static void emit_ARB1_opcode_dsss(Context *ctx, const char *opcode)
4375 {
4376     char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4377     char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
4378     char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
4379     char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
4380     output_line(ctx, "%s%s, %s, %s, %s;", opcode, dst, src0, src1, src2);
4381     emit_ARB1_dest_modifiers(ctx);
4382 } // emit_ARB1_opcode_dsss
4383 
4384 
4385 #define EMIT_ARB1_OPCODE_FUNC(op) \
4386     static void emit_ARB1_##op(Context *ctx) { \
4387         emit_ARB1_opcode(ctx, #op); \
4388     }
4389 #define EMIT_ARB1_OPCODE_D_FUNC(op) \
4390     static void emit_ARB1_##op(Context *ctx) { \
4391         emit_ARB1_opcode_d(ctx, #op); \
4392     }
4393 #define EMIT_ARB1_OPCODE_S_FUNC(op) \
4394     static void emit_ARB1_##op(Context *ctx) { \
4395         emit_ARB1_opcode_s(ctx, #op); \
4396     }
4397 #define EMIT_ARB1_OPCODE_SS_FUNC(op) \
4398     static void emit_ARB1_##op(Context *ctx) { \
4399         emit_ARB1_opcode_ss(ctx, #op); \
4400     }
4401 #define EMIT_ARB1_OPCODE_DS_FUNC(op) \
4402     static void emit_ARB1_##op(Context *ctx) { \
4403         emit_ARB1_opcode_ds(ctx, #op); \
4404     }
4405 #define EMIT_ARB1_OPCODE_DSS_FUNC(op) \
4406     static void emit_ARB1_##op(Context *ctx) { \
4407         emit_ARB1_opcode_dss(ctx, #op); \
4408     }
4409 #define EMIT_ARB1_OPCODE_DSSS_FUNC(op) \
4410     static void emit_ARB1_##op(Context *ctx) { \
4411         emit_ARB1_opcode_dsss(ctx, #op); \
4412     }
4413 #define EMIT_ARB1_OPCODE_DSSSS_FUNC(op) \
4414     static void emit_ARB1_##op(Context *ctx) { \
4415         emit_ARB1_opcode_dssss(ctx, #op); \
4416     }
4417 #define EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(op) \
4418     static void emit_ARB1_##op(Context *ctx) { \
4419         failf(ctx, #op " unimplemented in %s profile", ctx->profile->name); \
4420     }
4421 
4422 
4423 static void emit_ARB1_start(Context *ctx, const char *profilestr)
4424 {
4425     const char *shader_str = NULL;
4426     const char *shader_full_str = NULL;
4427     if (shader_is_vertex(ctx))
4428     {
4429         shader_str = "vp";
4430         shader_full_str = "vertex";
4431     } // if
4432     else if (shader_is_pixel(ctx))
4433     {
4434         shader_str = "fp";
4435         shader_full_str = "fragment";
4436     } // else if
4437     else
4438     {
4439         failf(ctx, "Shader type %u unsupported in this profile.",
4440               (uint) ctx->shader_type);
4441         return;
4442     } // if
4443 
4444     set_output(ctx, &ctx->preflight);
4445 
4446     if (strcmp(profilestr, MOJOSHADER_PROFILE_ARB1) == 0)
4447         output_line(ctx, "!!ARB%s1.0", shader_str);
4448 
4449     #if SUPPORT_PROFILE_ARB1_NV
4450     else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV2) == 0)
4451     {
4452         ctx->profile_supports_nv2 = 1;
4453         output_line(ctx, "!!ARB%s1.0", shader_str);
4454         output_line(ctx, "OPTION NV_%s_program2;", shader_full_str);
4455     } // else if
4456 
4457     else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV3) == 0)
4458     {
4459         // there's no NV_fragment_program3, so just use 2.
4460         const int ver = shader_is_pixel(ctx) ? 2 : 3;
4461         ctx->profile_supports_nv2 = 1;
4462         ctx->profile_supports_nv3 = 1;
4463         output_line(ctx, "!!ARB%s1.0", shader_str);
4464         output_line(ctx, "OPTION NV_%s_program%d;", shader_full_str, ver);
4465     } // else if
4466 
4467     else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV4) == 0)
4468     {
4469         ctx->profile_supports_nv2 = 1;
4470         ctx->profile_supports_nv3 = 1;
4471         ctx->profile_supports_nv4 = 1;
4472         output_line(ctx, "!!NV%s4.0", shader_str);
4473     } // else if
4474     #endif
4475 
4476     else
4477     {
4478         failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
4479     } // else
4480 
4481     set_output(ctx, &ctx->mainline);
4482 } // emit_ARB1_start
4483 
4484 static void emit_ARB1_end(Context *ctx)
4485 {
4486     // ps_1_* writes color to r0 instead oC0. We move it to the right place.
4487     // We don't have to worry about a RET opcode messing this up, since
4488     //  RET isn't available before ps_2_0.
4489     if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
4490     {
4491         set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1);
4492         output_line(ctx, "MOV oC0, r0;");
4493     } // if
4494 
4495     output_line(ctx, "END");
4496 } // emit_ARB1_end
4497 
4498 static void emit_ARB1_phase(Context *ctx)
4499 {
4500     // no-op in arb1.
4501 } // emit_ARB1_phase
4502 
4503 static inline const char *arb1_float_temp(const Context *ctx)
4504 {
4505     // nv4 lets you specify data type.
4506     return (support_nv4(ctx)) ? "FLOAT TEMP" : "TEMP";
4507 } // arb1_float_temp
4508 
4509 static void emit_ARB1_finalize(Context *ctx)
4510 {
4511     push_output(ctx, &ctx->preflight);
4512 
4513     if (shader_is_vertex(ctx) && !ctx->arb1_wrote_position)
4514         output_line(ctx, "OPTION ARB_position_invariant;");
4515 
4516     if (shader_is_pixel(ctx) && ctx->have_multi_color_outputs)
4517         output_line(ctx, "OPTION ARB_draw_buffers;");
4518 
4519     pop_output(ctx);
4520 
4521     const char *tmpstr = arb1_float_temp(ctx);
4522     int i;
4523     push_output(ctx, &ctx->globals);
4524     for (i = 0; i < ctx->max_scratch_registers; i++)
4525     {
4526         char buf[64];
4527         allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
4528         output_line(ctx, "%s %s;", tmpstr, buf);
4529     } // for
4530 
4531     // nv2 fragment programs (and anything nv4) have a real REP/ENDREP.
4532     if ( (support_nv2(ctx)) && (!shader_is_pixel(ctx)) && (!support_nv4(ctx)) )
4533     {
4534         // set up temps for nv2 REP/ENDREP emulation through branching.
4535         for (i = 0; i < ctx->max_reps; i++)
4536             output_line(ctx, "TEMP rep%d;", i);
4537     } // if
4538 
4539     pop_output(ctx);
4540     assert(ctx->scratch_registers == ctx->max_scratch_registers);
4541 } // emit_ARB1_finalize
4542 
4543 static void emit_ARB1_global(Context *ctx, RegisterType regtype, int regnum)
4544 {
4545     // !!! FIXME: dependency on ARB1 profile.  // !!! FIXME about FIXME: huh?
4546     char varname[64];
4547     get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
4548 
4549     push_output(ctx, &ctx->globals);
4550     switch (regtype)
4551     {
4552         case REG_TYPE_ADDRESS:
4553             if (shader_is_pixel(ctx))  // actually REG_TYPE_TEXTURE.
4554             {
4555                 // We have to map texture registers to temps for ps_1_1, since
4556                 //  they work like temps, initialize with tex coords, and the
4557                 //  ps_1_1 TEX opcode expects to overwrite it.
4558                 if (!shader_version_atleast(ctx, 1, 4))
4559                 {
4560                     output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
4561                     push_output(ctx, &ctx->mainline_intro);
4562                     output_line(ctx, "MOV %s, fragment.texcoord[%d];",
4563                                 varname, regnum);
4564                     pop_output(ctx);
4565                 } // if
4566                 break;
4567             } // if
4568 
4569             // nv4 replaced address registers with generic int registers.
4570             if (support_nv4(ctx))
4571                 output_line(ctx, "INT TEMP %s;", varname);
4572             else
4573             {
4574                 // nv2 has four-component address already, but stock arb1 has
4575                 //  to emulate it in a temporary, and move components to the
4576                 //  scalar ADDRESS register on demand.
4577                 output_line(ctx, "ADDRESS %s;", varname);
4578                 if (!support_nv2(ctx))
4579                     output_line(ctx, "TEMP addr%d;", regnum);
4580             } // else
4581             break;
4582 
4583         //case REG_TYPE_PREDICATE:
4584         //    output_line(ctx, "bvec4 %s;", varname);
4585         //    break;
4586         case REG_TYPE_TEMP:
4587             output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
4588             break;
4589         //case REG_TYPE_LOOP:
4590         //    break; // no-op. We declare these in for loops at the moment.
4591         //case REG_TYPE_LABEL:
4592         //    break; // no-op. If we see it here, it means we optimized it out.
4593         default:
4594             fail(ctx, "BUG: we used a register we don't know how to define.");
4595             break;
4596     } // switch
4597     pop_output(ctx);
4598 } // emit_ARB1_global
4599 
4600 static void emit_ARB1_array(Context *ctx, VariableList *var)
4601 {
4602     // All uniforms are now packed tightly into the program.local array,
4603     //  instead of trying to map them to the d3d registers. So this needs to
4604     //  map to the next piece of the array we haven't used yet. Thankfully,
4605     //  arb1 lets you make a PARAM array that maps to a subset of another
4606     //  array; we don't need to do offsets, since myarray[0] can map to
4607     //  program.local[5] without any extra math from us.
4608     const int base = var->index;
4609     const int size = var->count;
4610     const int arb1base = ctx->uniform_float4_count +
4611                          ctx->uniform_int4_count +
4612                          ctx->uniform_bool_count;
4613     char varname[64];
4614     get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname));
4615     push_output(ctx, &ctx->globals);
4616     output_line(ctx, "PARAM %s[%d] = { program.local[%d..%d] };", varname,
4617                 size, arb1base, (arb1base + size) - 1);
4618     pop_output(ctx);
4619     var->emit_position = arb1base;
4620 } // emit_ARB1_array
4621 
4622 static void emit_ARB1_const_array(Context *ctx, const ConstantsList *clist,
4623                                   int base, int size)
4624 {
4625     char varname[64];
4626     get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname));
4627     int i;
4628 
4629     push_output(ctx, &ctx->globals);
4630     output_line(ctx, "PARAM %s[%d] = {", varname, size);
4631     ctx->indent++;
4632 
4633     for (i = 0; i < size; i++)
4634     {
4635         while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
4636             clist = clist->next;
4637         assert(clist->constant.index == (base + i));
4638 
4639         char val0[32];
4640         char val1[32];
4641         char val2[32];
4642         char val3[32];
4643         floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
4644         floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
4645         floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
4646         floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
4647 
4648         output_line(ctx, "{ %s, %s, %s, %s }%s", val0, val1, val2, val3,
4649                     (i < (size-1)) ? "," : "");
4650 
4651         clist = clist->next;
4652     } // for
4653 
4654     ctx->indent--;
4655     output_line(ctx, "};");
4656     pop_output(ctx);
4657 } // emit_ARB1_const_array
4658 
4659 static void emit_ARB1_uniform(Context *ctx, RegisterType regtype, int regnum,
4660                               const VariableList *var)
4661 {
4662     // We pack these down into the program.local array, so if we only use
4663     //  register c439, it'll actually map to program.local[0]. This will
4664     //  prevent overflows when we actually have enough resources to run.
4665 
4666     const char *arrayname = "program.local";
4667     int index = 0;
4668 
4669     char varname[64];
4670     get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
4671 
4672     push_output(ctx, &ctx->globals);
4673 
4674     if (var == NULL)
4675     {
4676         // all types share one array (rather, all types convert to float4).
4677         index = ctx->uniform_float4_count + ctx->uniform_int4_count +
4678                 ctx->uniform_bool_count;
4679     } // if
4680 
4681     else
4682     {
4683         const int arraybase = var->index;
4684         if (var->constant)
4685         {
4686             const int arraysize = var->count;
4687             arrayname = get_ARB1_const_array_varname_in_buf(ctx, arraybase,
4688                                         arraysize, (char *) alloca(64), 64);
4689             index = (regnum - arraybase);
4690         } // if
4691         else
4692         {
4693             assert(var->emit_position != -1);
4694             index = (regnum - arraybase) + var->emit_position;
4695         } // else
4696     } // else
4697 
4698     output_line(ctx, "PARAM %s = %s[%d];", varname, arrayname, index);
4699     pop_output(ctx);
4700 } // emit_ARB1_uniform
4701 
4702 static void emit_ARB1_sampler(Context *ctx,int stage,TextureType ttype,int tb)
4703 {
4704     // this is mostly a no-op...you don't predeclare samplers in arb1.
4705 
4706     if (tb)  // This sampler used a ps_1_1 TEXBEM opcode?
4707     {
4708         const int index = ctx->uniform_float4_count + ctx->uniform_int4_count +
4709                           ctx->uniform_bool_count;
4710         char var[64];
4711         get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof(var));
4712         push_output(ctx, &ctx->globals);
4713         output_line(ctx, "PARAM %s_texbem = program.local[%d];", var, index);
4714         output_line(ctx, "PARAM %s_texbeml = program.local[%d];", var, index+1);
4715         pop_output(ctx);
4716         ctx->uniform_float4_count += 2;
4717     } // if
4718 } // emit_ARB1_sampler
4719 
4720 // !!! FIXME: a lot of cut-and-paste here from emit_GLSL_attribute().
4721 static void emit_ARB1_attribute(Context *ctx, RegisterType regtype, int regnum,
4722                                 MOJOSHADER_usage usage, int index, int wmask,
4723                                 int flags)
4724 {
4725     // !!! FIXME: this function doesn't deal with write masks at all yet!
4726     const char *usage_str = NULL;
4727     const char *arrayleft = "";
4728     const char *arrayright = "";
4729     char index_str[16] = { '\0' };
4730 
4731     char varname[64];
4732     get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
4733 
4734     //assert((flags & MOD_PP) == 0);  // !!! FIXME: is PP allowed?
4735 
4736     if (index != 0)  // !!! FIXME: a lot of these MUST be zero.
4737         snprintf(index_str, sizeof (index_str), "%u", (uint) index);
4738 
4739     if (shader_is_vertex(ctx))
4740     {
4741         // pre-vs3 output registers.
4742         // these don't ever happen in DCL opcodes, I think. Map to vs_3_*
4743         //  output registers.
4744         if (!shader_version_atleast(ctx, 3, 0))
4745         {
4746             if (regtype == REG_TYPE_RASTOUT)
4747             {
4748                 regtype = REG_TYPE_OUTPUT;
4749                 index = regnum;
4750                 switch ((const RastOutType) regnum)
4751                 {
4752                     case RASTOUT_TYPE_POSITION:
4753                         usage = MOJOSHADER_USAGE_POSITION;
4754                         break;
4755                     case RASTOUT_TYPE_FOG:
4756                         usage = MOJOSHADER_USAGE_FOG;
4757                         break;
4758                     case RASTOUT_TYPE_POINT_SIZE:
4759                         usage = MOJOSHADER_USAGE_POINTSIZE;
4760                         break;
4761                 } // switch
4762             } // if
4763 
4764             else if (regtype == REG_TYPE_ATTROUT)
4765             {
4766                 regtype = REG_TYPE_OUTPUT;
4767                 usage = MOJOSHADER_USAGE_COLOR;
4768                 index = regnum;
4769             } // else if
4770 
4771             else if (regtype == REG_TYPE_TEXCRDOUT)
4772             {
4773                 regtype = REG_TYPE_OUTPUT;
4774                 usage = MOJOSHADER_USAGE_TEXCOORD;
4775                 index = regnum;
4776             } // else if
4777         } // if
4778 
4779         // to avoid limitations of various GL entry points for input
4780         // attributes (glSecondaryColorPointer() can only take 3 component
4781         // items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
4782         // issues), we set up all inputs as generic vertex attributes, so we
4783         // can pass data in just about any form, and ignore the built-in GLSL
4784         // attributes like gl_SecondaryColor. Output needs to use the the
4785         // built-ins, though, but we don't have to worry about the GL entry
4786         // point limitations there.
4787 
4788         if (regtype == REG_TYPE_INPUT)
4789         {
4790             const int attr = ctx->assigned_vertex_attributes++;
4791             push_output(ctx, &ctx->globals);
4792             output_line(ctx, "ATTRIB %s = vertex.attrib[%d];", varname, attr);
4793             pop_output(ctx);
4794         } // if
4795 
4796         else if (regtype == REG_TYPE_OUTPUT)
4797         {
4798             switch (usage)
4799             {
4800                 case MOJOSHADER_USAGE_POSITION:
4801                     ctx->arb1_wrote_position = 1;
4802                     usage_str = "result.position";
4803                     break;
4804                 case MOJOSHADER_USAGE_POINTSIZE:
4805                     usage_str = "result.pointsize";
4806                     break;
4807                 case MOJOSHADER_USAGE_COLOR:
4808                     index_str[0] = '\0';  // no explicit number.
4809                     if (index == 0)
4810                         usage_str = "result.color.primary";
4811                     else if (index == 1)
4812                         usage_str = "result.color.secondary";
4813                     break;
4814                 case MOJOSHADER_USAGE_FOG:
4815                     usage_str = "result.fogcoord";
4816                     break;
4817                 case MOJOSHADER_USAGE_TEXCOORD:
4818                     snprintf(index_str, sizeof (index_str), "%u", (uint) index);
4819                     usage_str = "result.texcoord";
4820                     arrayleft = "[";
4821                     arrayright = "]";
4822                     break;
4823                 default:
4824                     // !!! FIXME: we need to deal with some more built-in varyings here.
4825                     break;
4826             } // switch
4827 
4828             // !!! FIXME: the #define is a little hacky, but it means we don't
4829             // !!! FIXME:  have to track these separately if this works.
4830             push_output(ctx, &ctx->globals);
4831             // no mapping to built-in var? Just make it a regular global, pray.
4832             if (usage_str == NULL)
4833                 output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
4834             else
4835             {
4836                 output_line(ctx, "OUTPUT %s = %s%s%s%s;", varname, usage_str,
4837                             arrayleft, index_str, arrayright);
4838             } // else
4839             pop_output(ctx);
4840         } // else if
4841 
4842         else
4843         {
4844             fail(ctx, "unknown vertex shader attribute register");
4845         } // else
4846     } // if
4847 
4848     else if (shader_is_pixel(ctx))
4849     {
4850         const char *paramtype_str = "ATTRIB";
4851 
4852         // samplers DCLs get handled in emit_ARB1_sampler().
4853 
4854         if (flags & MOD_CENTROID)
4855         {
4856             if (!support_nv4(ctx))  // GL_NV_fragment_program4 adds centroid.
4857             {
4858                 // !!! FIXME: should we just wing it without centroid here?
4859                 failf(ctx, "centroid unsupported in %s profile",
4860                       ctx->profile->name);
4861                 return;
4862             } // if
4863 
4864             paramtype_str = "CENTROID ATTRIB";
4865         } // if
4866 
4867         if (regtype == REG_TYPE_COLOROUT)
4868         {
4869             paramtype_str = "OUTPUT";
4870             usage_str = "result.color";
4871             if (ctx->have_multi_color_outputs)
4872             {
4873                 // We have to gamble that you have GL_ARB_draw_buffers.
4874                 // You probably do at this point if you have a sane setup.
4875                 snprintf(index_str, sizeof (index_str), "%u", (uint) regnum);
4876                 arrayleft = "[";
4877                 arrayright = "]";
4878             } // if
4879         } // if
4880 
4881         else if (regtype == REG_TYPE_DEPTHOUT)
4882         {
4883             paramtype_str = "OUTPUT";
4884             usage_str = "result.depth";
4885         } // else if
4886 
4887         // !!! FIXME: can you actualy have a texture register with COLOR usage?
4888         else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT))
4889         {
4890             if (usage == MOJOSHADER_USAGE_TEXCOORD)
4891             {
4892                 // ps_1_1 does a different hack for this attribute.
4893                 //  Refer to emit_ARB1_global()'s REG_TYPE_TEXTURE code.
4894                 if (shader_version_atleast(ctx, 1, 4))
4895                 {
4896                     snprintf(index_str, sizeof (index_str), "%u", (uint) index);
4897                     usage_str = "fragment.texcoord";
4898                     arrayleft = "[";
4899                     arrayright = "]";
4900                 } // if
4901             } // if
4902 
4903             else if (usage == MOJOSHADER_USAGE_COLOR)
4904             {
4905                 index_str[0] = '\0';  // no explicit number.
4906                 if (index == 0)
4907                     usage_str = "fragment.color.primary";
4908                 else if (index == 1)
4909                     usage_str = "fragment.color.secondary";
4910                 else
4911                     fail(ctx, "unsupported color index");
4912             } // else if
4913         } // else if
4914 
4915         else if (regtype == REG_TYPE_MISCTYPE)
4916         {
4917             const MiscTypeType mt = (MiscTypeType) regnum;
4918             if (mt == MISCTYPE_TYPE_FACE)
4919             {
4920                 if (support_nv4(ctx))  // FINALLY, a vFace equivalent in nv4!
4921                 {
4922                     index_str[0] = '\0';  // no explicit number.
4923                     usage_str = "fragment.facing";
4924                 } // if
4925                 else
4926                 {
4927                     failf(ctx, "vFace unsupported in %s profile",
4928                           ctx->profile->name);
4929                 } // else
4930             } // if
4931             else if (mt == MISCTYPE_TYPE_POSITION)
4932             {
4933                 index_str[0] = '\0';  // no explicit number.
4934                 usage_str = "fragment.position";  // !!! FIXME: is this the same coord space as D3D?
4935             } // else if
4936             else
4937             {
4938                 fail(ctx, "BUG: unhandled misc register");
4939             } // else
4940         } // else if
4941 
4942         else
4943         {
4944             fail(ctx, "unknown pixel shader attribute register");
4945         } // else
4946 
4947         if (usage_str != NULL)
4948         {
4949             push_output(ctx, &ctx->globals);
4950             output_line(ctx, "%s %s = %s%s%s%s;", paramtype_str, varname,
4951                         usage_str, arrayleft, index_str, arrayright);
4952             pop_output(ctx);
4953         } // if
4954     } // else if
4955 
4956     else
4957     {
4958         fail(ctx, "Unknown shader type");  // state machine should catch this.
4959     } // else
4960 } // emit_ARB1_attribute
4961 
4962 static void emit_ARB1_RESERVED(Context *ctx) { /* no-op. */ }
4963 
4964 static void emit_ARB1_NOP(Context *ctx)
4965 {
4966     // There is no NOP in arb1. Just don't output anything here.
4967 } // emit_ARB1_NOP
4968 
4969 EMIT_ARB1_OPCODE_DS_FUNC(MOV)
4970 EMIT_ARB1_OPCODE_DSS_FUNC(ADD)
4971 EMIT_ARB1_OPCODE_DSS_FUNC(SUB)
4972 EMIT_ARB1_OPCODE_DSSS_FUNC(MAD)
4973 EMIT_ARB1_OPCODE_DSS_FUNC(MUL)
4974 EMIT_ARB1_OPCODE_DS_FUNC(RCP)
4975 
4976 static void emit_ARB1_RSQ(Context *ctx)
4977 {
4978     // nv4 doesn't force abs() on this, so negative values will generate NaN.
4979     // The spec says you should force the abs() yourself.
4980     if (!support_nv4(ctx))
4981     {
4982         emit_ARB1_opcode_ds(ctx, "RSQ");  // pre-nv4 implies ABS.
4983         return;
4984     } // if
4985 
4986     // we can optimize this to use nv2's |abs| construct in some cases.
4987     if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
4988          (ctx->source_args[0].src_mod == SRCMOD_NEGATE) ||
4989          (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
4990         ctx->source_args[0].src_mod = SRCMOD_ABS;
4991 
4992     char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
4993     char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
4994 
4995     if (ctx->source_args[0].src_mod == SRCMOD_ABS)
4996         output_line(ctx, "RSQ%s, %s;", dst, src0);
4997     else
4998     {
4999         char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5000         output_line(ctx, "ABS %s, %s;", buf, src0);
5001         output_line(ctx, "RSQ%s, %s.x;", dst, buf);
5002     } // else
5003 
5004     emit_ARB1_dest_modifiers(ctx);
5005 } // emit_ARB1_RSQ
5006 
5007 EMIT_ARB1_OPCODE_DSS_FUNC(DP3)
5008 EMIT_ARB1_OPCODE_DSS_FUNC(DP4)
5009 EMIT_ARB1_OPCODE_DSS_FUNC(MIN)
5010 EMIT_ARB1_OPCODE_DSS_FUNC(MAX)
5011 EMIT_ARB1_OPCODE_DSS_FUNC(SLT)
5012 EMIT_ARB1_OPCODE_DSS_FUNC(SGE)
5013 
5014 static void emit_ARB1_EXP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
5015 
5016 static void arb1_log(Context *ctx, const char *opcode)
5017 {
5018     // !!! FIXME: SRCMOD_NEGATE can be made into SRCMOD_ABS here, too
5019     // we can optimize this to use nv2's |abs| construct in some cases.
5020     if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
5021          (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
5022         ctx->source_args[0].src_mod = SRCMOD_ABS;
5023 
5024     char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5025     char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5026 
5027     if (ctx->source_args[0].src_mod == SRCMOD_ABS)
5028         output_line(ctx, "%s%s, %s;", opcode, dst, src0);
5029     else
5030     {
5031         char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5032         output_line(ctx, "ABS %s, %s;", buf, src0);
5033         output_line(ctx, "%s%s, %s.x;", opcode, dst, buf);
5034     } // else
5035 
5036     emit_ARB1_dest_modifiers(ctx);
5037 } // arb1_log
5038 
5039 
5040 static void emit_ARB1_LOG(Context *ctx)
5041 {
5042     arb1_log(ctx, "LG2");
5043 } // emit_ARB1_LOG
5044 
5045 
5046 EMIT_ARB1_OPCODE_DS_FUNC(LIT)
5047 EMIT_ARB1_OPCODE_DSS_FUNC(DST)
5048 
5049 static void emit_ARB1_LRP(Context *ctx)
5050 {
5051     if (shader_is_pixel(ctx))  // fragment shaders have a matching LRP opcode.
5052         emit_ARB1_opcode_dsss(ctx, "LRP");
5053     else
5054     {
5055         char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5056         char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5057         char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
5058         char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
5059         char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5060 
5061         // LRP is: dest = src2 + src0 * (src1 - src2)
5062         output_line(ctx, "SUB %s, %s, %s;", buf, src1, src2);
5063         output_line(ctx, "MAD%s, %s, %s, %s;", dst, buf, src0, src2);
5064         emit_ARB1_dest_modifiers(ctx);
5065     } // else
5066 } // emit_ARB1_LRP
5067 
5068 EMIT_ARB1_OPCODE_DS_FUNC(FRC)
5069 
5070 static void arb1_MxXy(Context *ctx, const int x, const int y)
5071 {
5072     DestArgInfo *dstarg = &ctx->dest_arg;
5073     const int origmask = dstarg->writemask;
5074     char src0[64];
5075     int i;
5076 
5077     make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5078 
5079     for (i = 0; i < y; i++)
5080     {
5081         char dst[64];
5082         char row[64];
5083         make_ARB1_srcarg_string(ctx, i + 1, row, sizeof (row));
5084         set_dstarg_writemask(dstarg, 1 << i);
5085         make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5086         output_line(ctx, "DP%d%s, %s, %s;", x, dst, src0, row);
5087     } // for
5088 
5089     set_dstarg_writemask(dstarg, origmask);
5090     emit_ARB1_dest_modifiers(ctx);
5091 } // arb1_MxXy
5092 
5093 static void emit_ARB1_M4X4(Context *ctx) { arb1_MxXy(ctx, 4, 4); }
5094 static void emit_ARB1_M4X3(Context *ctx) { arb1_MxXy(ctx, 4, 3); }
5095 static void emit_ARB1_M3X4(Context *ctx) { arb1_MxXy(ctx, 3, 4); }
5096 static void emit_ARB1_M3X3(Context *ctx) { arb1_MxXy(ctx, 3, 3); }
5097 static void emit_ARB1_M3X2(Context *ctx) { arb1_MxXy(ctx, 3, 2); }
5098 
5099 static void emit_ARB1_CALL(Context *ctx)
5100 {
5101     if (!support_nv2(ctx))  // no branching in stock ARB1.
5102     {
5103         failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5104         return;
5105     } // if
5106 
5107     char labelstr[64];
5108     get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
5109     output_line(ctx, "CAL %s;", labelstr);
5110 } // emit_ARB1_CALL
5111 
5112 static void emit_ARB1_CALLNZ(Context *ctx)
5113 {
5114     // !!! FIXME: if src1 is a constbool that's true, we can remove the
5115     // !!! FIXME:  if. If it's false, we can make this a no-op.
5116 
5117     if (!support_nv2(ctx))  // no branching in stock ARB1.
5118         failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5119     else
5120     {
5121         // !!! FIXME: double-check this.
5122         char labelstr[64];
5123         char scratch[64];
5124         char src1[64];
5125         get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
5126         get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));
5127         allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
5128         output_line(ctx, "MOVC %s, %s;", scratch, src1);
5129         output_line(ctx, "CAL %s (NE.x);", labelstr);
5130     } // else
5131 } // emit_ARB1_CALLNZ
5132 
5133 // !!! FIXME: needs BRA in nv2, LOOP in nv2 fragment progs, and REP in nv4.
5134 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LOOP)
5135 
5136 static void emit_ARB1_RET(Context *ctx)
5137 {
5138     // don't fail() if no nv2...maybe we're just ending the mainline?
5139     //  if we're ending a LABEL that had no CALL, this would all be written
5140     //  to ctx->ignore anyhow, so this should be "safe" ... arb1 profile will
5141     //  just end up throwing all this code out.
5142     if (support_nv2(ctx))  // no branching in stock ARB1.
5143         output_line(ctx, "RET;");
5144     set_output(ctx, &ctx->mainline); // in case we were ignoring this function.
5145 } // emit_ARB1_RET
5146 
5147 
5148 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(ENDLOOP)
5149 
5150 static void emit_ARB1_LABEL(Context *ctx)
5151 {
5152     if (!support_nv2(ctx))  // no branching in stock ARB1.
5153         return;  // don't fail()...maybe we never use it, but do fail in CALL.
5154 
5155     const int label = ctx->source_args[0].regnum;
5156     RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
5157 
5158     // MSDN specs say CALL* has to come before the LABEL, so we know if we
5159     //  can ditch the entire function here as unused.
5160     if (reg == NULL)
5161         set_output(ctx, &ctx->ignore);  // Func not used. Parse, but don't output.
5162 
5163     // !!! FIXME: it would be nice if we could determine if a function is
5164     // !!! FIXME:  only called once and, if so, forcibly inline it.
5165 
5166     //const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
5167     char labelstr[64];
5168     get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
5169     output_line(ctx, "%s:", labelstr);
5170 } // emit_ARB1_LABEL
5171 
5172 
5173 static void emit_ARB1_POW(Context *ctx)
5174 {
5175     // we can optimize this to use nv2's |abs| construct in some cases.
5176     if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
5177          (ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
5178         ctx->source_args[0].src_mod = SRCMOD_ABS;
5179 
5180     char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5181     char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5182     char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
5183 
5184     if (ctx->source_args[0].src_mod == SRCMOD_ABS)
5185         output_line(ctx, "POW%s, %s, %s;", dst, src0, src1);
5186     else
5187     {
5188         char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5189         output_line(ctx, "ABS %s, %s;", buf, src0);
5190         output_line(ctx, "POW%s, %s.x, %s;", dst, buf, src1);
5191     } // else
5192 
5193     emit_ARB1_dest_modifiers(ctx);
5194 } // emit_ARB1_POW
5195 
5196 static void emit_ARB1_CRS(Context *ctx) { emit_ARB1_opcode_dss(ctx, "XPD"); }
5197 
5198 static void emit_ARB1_SGN(Context *ctx)
5199 {
5200     if (support_nv2(ctx))
5201         emit_ARB1_opcode_ds(ctx, "SSG");
5202     else
5203     {
5204         char dst[64];
5205         char src0[64];
5206         char scratch1[64];
5207         char scratch2[64];
5208         make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5209         make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5210         allocate_ARB1_scratch_reg_name(ctx, scratch1, sizeof (scratch1));
5211         allocate_ARB1_scratch_reg_name(ctx, scratch2, sizeof (scratch2));
5212         output_line(ctx, "SLT %s, %s, 0.0;", scratch1, src0);
5213         output_line(ctx, "SLT %s, -%s, 0.0;", scratch2, src0);
5214         output_line(ctx, "ADD%s -%s, %s;", dst, scratch1, scratch2);
5215         emit_ARB1_dest_modifiers(ctx);
5216     } // else
5217 } // emit_ARB1_SGN
5218 
5219 EMIT_ARB1_OPCODE_DS_FUNC(ABS)
5220 
5221 static void emit_ARB1_NRM(Context *ctx)
5222 {
5223     // nv2 fragment programs (and anything nv4) have a real NRM.
5224     if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5225         emit_ARB1_opcode_ds(ctx, "NRM");
5226     else
5227     {
5228         char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5229         char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5230         char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5231         output_line(ctx, "DP3 %s.w, %s, %s;", buf, src0, src0);
5232         output_line(ctx, "RSQ %s.w, %s.w;", buf, buf);
5233         output_line(ctx, "MUL%s, %s.w, %s;", dst, buf, src0);
5234         emit_ARB1_dest_modifiers(ctx);
5235     } // else
5236 } // emit_ARB1_NRM
5237 
5238 
5239 static void emit_ARB1_SINCOS(Context *ctx)
5240 {
5241     // we don't care about the temp registers that <= sm2 demands; ignore them.
5242     const int mask = ctx->dest_arg.writemask;
5243 
5244     // arb1 fragment programs and everything nv4 have sin/cos/sincos opcodes.
5245     if ((shader_is_pixel(ctx)) || (support_nv4(ctx)))
5246     {
5247         char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5248         char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5249         if (writemask_x(mask))
5250             output_line(ctx, "COS%s, %s;", dst, src0);
5251         else if (writemask_y(mask))
5252             output_line(ctx, "SIN%s, %s;", dst, src0);
5253         else if (writemask_xy(mask))
5254             output_line(ctx, "SCS%s, %s;", dst, src0);
5255     } // if
5256 
5257     // nv2+ profiles have sin and cos opcodes.
5258     else if (support_nv2(ctx))
5259     {
5260         char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5261         char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5262         if (writemask_x(mask))
5263             output_line(ctx, "COS %s.x, %s;", dst, src0);
5264         else if (writemask_y(mask))
5265             output_line(ctx, "SIN %s.y, %s;", dst, src0);
5266         else if (writemask_xy(mask))
5267         {
5268             output_line(ctx, "SIN %s.x, %s;", dst, src0);
5269             output_line(ctx, "COS %s.y, %s;", dst, src0);
5270         } // else if
5271     } // if
5272 
5273     else  // big nasty.
5274     {
5275         char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5276         char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
5277         const int need_sin = (writemask_x(mask) || writemask_xy(mask));
5278         const int need_cos = (writemask_y(mask) || writemask_xy(mask));
5279         char scratch[64];
5280 
5281         if (need_sin || need_cos)
5282             allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
5283 
5284         // These sin() and cos() approximations originally found here:
5285         //    http://www.devmaster.net/forums/showthread.php?t=5784
5286         //
5287         // const float B = 4.0f / M_PI;
5288         // const float C = -4.0f / (M_PI * M_PI);
5289         // float y = B * x + C * x * fabs(x);
5290         //
5291         // // optional better precision...
5292         // const float P = 0.225f;
5293         // y = P * (y * fabs(y) - y) + y;
5294         //
5295         //
5296         // That first thing can be reduced to:
5297         // const float y = ((1.2732395447351626861510701069801f * x) +
5298         //             ((-0.40528473456935108577551785283891f * x) * fabs(x)));
5299 
5300         if (need_sin)
5301         {
5302             // !!! FIXME: use SRCMOD_ABS here?
5303             output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
5304             output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
5305             output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
5306             output_line(ctx, "MAD %s.x, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
5307         } // if
5308 
5309         // cosine is sin(x + M_PI/2), but you have to wrap x to pi:
5310         //  if (x+(M_PI/2) > M_PI)
5311         //      x -= 2 * M_PI;
5312         //
5313         // which is...
5314         //  if (x+(1.57079637050628662109375) > 3.1415927410125732421875)
5315         //      x += -6.283185482025146484375;
5316 
5317         if (need_cos)
5318         {
5319             output_line(ctx, "ADD %s.x, %s.x, 1.57079637050628662109375;", scratch, src0);
5320             output_line(ctx, "SGE %s.y, %s.x, 3.1415927410125732421875;", scratch, scratch);
5321             output_line(ctx, "MAD %s.x, %s.y, -6.283185482025146484375, %s.x;", scratch, scratch, scratch);
5322             output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
5323             output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
5324             output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
5325             output_line(ctx, "MAD %s.y, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
5326         } // if
5327     } // else
5328 
5329     // !!! FIXME: might not have done anything. Don't emit if we didn't.
5330     if (!isfail(ctx))
5331         emit_ARB1_dest_modifiers(ctx);
5332 } // emit_ARB1_SINCOS
5333 
5334 
5335 static void emit_ARB1_REP(Context *ctx)
5336 {
5337     char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5338 
5339     // nv2 fragment programs (and everything nv4) have a real REP.
5340     if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5341         output_line(ctx, "REP %s;", src0);
5342 
5343     else if (support_nv2(ctx))
5344     {
5345         // no REP, but we can use branches.
5346         char failbranch[32];
5347         char topbranch[32];
5348         const int toplabel = allocate_branch_label(ctx);
5349         const int faillabel = allocate_branch_label(ctx);
5350         get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
5351         get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch));
5352 
5353         assert(((size_t) ctx->branch_labels_stack_index) <
5354                 STATICARRAYLEN(ctx->branch_labels_stack)-1);
5355 
5356         ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = toplabel;
5357         ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = faillabel;
5358 
5359         char scratch[32];
5360         snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
5361         output_line(ctx, "MOVC %s.x, %s;", scratch, src0);
5362         output_line(ctx, "BRA %s (LE.x);", failbranch);
5363         output_line(ctx, "%s:", topbranch);
5364     } // else if
5365 
5366     else  // stock ARB1 has no branching.
5367     {
5368         fail(ctx, "branching unsupported in this profile");
5369     } // else
5370 } // emit_ARB1_REP
5371 
5372 
5373 static void emit_ARB1_ENDREP(Context *ctx)
5374 {
5375     // nv2 fragment programs (and everything nv4) have a real ENDREP.
5376     if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5377         output_line(ctx, "ENDREP;");
5378 
5379     else if (support_nv2(ctx))
5380     {
5381         // no ENDREP, but we can use branches.
5382         assert(ctx->branch_labels_stack_index >= 2);
5383 
5384         char failbranch[32];
5385         char topbranch[32];
5386         const int faillabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
5387         const int toplabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
5388         get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
5389         get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch));
5390 
5391         char scratch[32];
5392         snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
5393         output_line(ctx, "SUBC %s.x, %s.x, 1.0;", scratch, scratch);
5394         output_line(ctx, "BRA %s (GT.x);", topbranch);
5395         output_line(ctx, "%s:", failbranch);
5396     } // else if
5397 
5398     else  // stock ARB1 has no branching.
5399     {
5400         fail(ctx, "branching unsupported in this profile");
5401     } // else
5402 } // emit_ARB1_ENDREP
5403 
5404 
5405 static void nv2_if(Context *ctx)
5406 {
5407     // The condition code register MUST be set up before this!
5408     // nv2 fragment programs (and everything nv4) have a real IF.
5409     if ( (support_nv4(ctx)) || (shader_is_pixel(ctx)) )
5410         output_line(ctx, "IF EQ.x;");
5411     else
5412     {
5413         // there's no IF construct, but we can use a branch to a label.
5414         char failbranch[32];
5415         const int label = allocate_branch_label(ctx);
5416         get_ARB1_branch_label_name(ctx, label, failbranch, sizeof (failbranch));
5417 
5418         assert(((size_t) ctx->branch_labels_stack_index)
5419                  < STATICARRAYLEN(ctx->branch_labels_stack));
5420 
5421         ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = label;
5422 
5423         // !!! FIXME: should this be NE? (EQ would jump to the ELSE for the IF condition, right?).
5424         output_line(ctx, "BRA %s (EQ.x);", failbranch);
5425     } // else
5426 } // nv2_if
5427 
5428 
5429 static void emit_ARB1_IF(Context *ctx)
5430 {
5431     if (support_nv2(ctx))
5432     {
5433         char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
5434         char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
5435         output_line(ctx, "MOVC %s.x, %s;", buf, src0);
5436         nv2_if(ctx);
5437     } // if
5438 
5439     else  // stock ARB1 has no branching.
5440     {
5441         failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5442     } // else
5443 } // emit_ARB1_IF
5444 
5445 
5446 static void emit_ARB1_ELSE(Context *ctx)
5447 {
5448     // nv2 fragment programs (and everything nv4) have a real ELSE.
5449     if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5450         output_line(ctx, "ELSE;");
5451 
5452     else if (support_nv2(ctx))
5453     {
5454         // there's no ELSE construct, but we can use a branch to a label.
5455         assert(ctx->branch_labels_stack_index > 0);
5456 
5457         // At the end of the IF block, unconditionally jump to the ENDIF.
5458         const int endlabel = allocate_branch_label(ctx);
5459         char endbranch[32];
5460         get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch));
5461         output_line(ctx, "BRA %s;", endbranch);
5462 
5463         // Now mark the ELSE section with a lable.
5464         const int elselabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index-1];
5465         char elsebranch[32];
5466         get_ARB1_branch_label_name(ctx,elselabel,elsebranch,sizeof(elsebranch));
5467         output_line(ctx, "%s:", elsebranch);
5468 
5469         // Replace the ELSE label with the ENDIF on the label stack.
5470         ctx->branch_labels_stack[ctx->branch_labels_stack_index-1] = endlabel;
5471     } // else if
5472 
5473     else  // stock ARB1 has no branching.
5474     {
5475         failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5476     } // else
5477 } // emit_ARB1_ELSE
5478 
5479 
5480 static void emit_ARB1_ENDIF(Context *ctx)
5481 {
5482     // nv2 fragment programs (and everything nv4) have a real ENDIF.
5483     if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5484         output_line(ctx, "ENDIF;");
5485 
5486     else if (support_nv2(ctx))
5487     {
5488         // there's no ENDIF construct, but we can use a branch to a label.
5489         assert(ctx->branch_labels_stack_index > 0);
5490         const int endlabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
5491         char endbranch[32];
5492         get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch));
5493         output_line(ctx, "%s:", endbranch);
5494     } // if
5495 
5496     else  // stock ARB1 has no branching.
5497     {
5498         failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5499     } // else
5500 } // emit_ARB1_ENDIF
5501 
5502 
5503 static void emit_ARB1_BREAK(Context *ctx)
5504 {
5505     // nv2 fragment programs (and everything nv4) have a real BREAK.
5506     if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
5507         output_line(ctx, "BRK;");
5508 
5509     else if (support_nv2(ctx))
5510     {
5511         // no BREAK, but we can use branches.
5512         assert(ctx->branch_labels_stack_index >= 2);
5513         const int faillabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index];
5514         char failbranch[32];
5515         get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
5516         output_line(ctx, "BRA %s;", failbranch);
5517     } // else if
5518 
5519     else  // stock ARB1 has no branching.
5520     {
5521         failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
5522     } // else
5523 } // emit_ARB1_BREAK
5524 
5525 
5526 static void emit_ARB1_MOVA(Context *ctx)
5527 {
5528     // nv2 and nv3 can use the ARR opcode.
5529     // But nv4 removed ARR (and ADDRESS registers!). Just ROUND to an INT.
5530     if (support_nv4(ctx))
5531         emit_ARB1_opcode_ds(ctx, "ROUND.S");  // !!! FIXME: don't use a modifier here.
5532     else if ((support_nv2(ctx)) || (support_nv3(ctx)))
5533         emit_ARB1_opcode_ds(ctx, "ARR");
5534     else
5535     {
5536         char src0[64];
5537         char scratch[64];
5538         char addr[32];
5539 
5540         make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5541         allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
5542         snprintf(addr, sizeof (addr), "addr%d", ctx->dest_arg.regnum);
5543 
5544         // !!! FIXME: we can optimize this if src_mod is ABS or ABSNEGATE.
5545 
5546         // ARL uses floor(), but D3D expects round-to-nearest.
5547         // There is probably a more efficient way to do this.
5548         if (shader_is_pixel(ctx))  // CMP only exists in fragment programs.  :/
5549             output_line(ctx, "CMP %s, %s, -1.0, 1.0;", scratch, src0);
5550         else
5551         {
5552             output_line(ctx, "SLT %s, %s, 0.0;", scratch, src0);
5553             output_line(ctx, "MAD %s, %s, -2.0, 1.0;", scratch, scratch);
5554         } // else
5555 
5556         output_line(ctx, "ABS %s, %s;", addr, src0);
5557         output_line(ctx, "ADD %s, %s, 0.5;", addr, addr);
5558         output_line(ctx, "FLR %s, %s;", addr, addr);
5559         output_line(ctx, "MUL %s, %s, %s;", addr, addr, scratch);
5560 
5561         // we don't handle these right now, since emit_ARB1_dest_modifiers(ctx)
5562         //  wants to look at dest_arg, not our temp register.
5563         assert(ctx->dest_arg.result_mod == 0);
5564         assert(ctx->dest_arg.result_shift == 0);
5565 
5566         // we assign to the actual address register as needed.
5567         ctx->last_address_reg_component = -1;
5568     } // else
5569 } // emit_ARB1_MOVA
5570 
5571 
5572 static void emit_ARB1_TEXKILL(Context *ctx)
5573 {
5574     // d3d kills on xyz, arb1 kills on xyzw. Fix the swizzle.
5575     //  We just map the x component to w. If it's negative, the fragment
5576     //  would discard anyhow, otherwise, it'll pass through okay. This saves
5577     //  us a temp register.
5578     char dst[64];
5579     get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5580     output_line(ctx, "KIL %s.xyzx;", dst);
5581 } // emit_ARB1_TEXKILL
5582 
5583 static void arb1_texbem(Context *ctx, const int luminance)
5584 {
5585     // !!! FIXME: this code counts on the register not having swizzles, etc.
5586     const int stage = ctx->dest_arg.regnum;
5587     char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5588     char src[64]; get_ARB1_srcarg_varname(ctx, 0, src, sizeof (src));
5589     char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
5590     char sampler[64];
5591     get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage,
5592                             sampler, sizeof (sampler));
5593 
5594     output_line(ctx, "MUL %s, %s_texbem.xzyw, %s.xyxy;", tmp, sampler, src);
5595     output_line(ctx, "ADD %s.xy, %s.xzxx, %s.ywxx;", tmp, tmp, tmp);
5596     output_line(ctx, "ADD %s.xy, %s, %s;", tmp, tmp, dst);
5597     output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, tmp, stage);
5598 
5599     if (luminance)  // TEXBEML, not just TEXBEM?
5600     {
5601         output_line(ctx, "MAD %s, %s.zzzz, %s_texbeml.xxxx, %s_texbeml.yyyy;",
5602                     tmp, src, sampler, sampler);
5603         output_line(ctx, "MUL %s, %s, %s;", dst, dst, tmp);
5604     } // if
5605 
5606     emit_ARB1_dest_modifiers(ctx);
5607 } // arb1_texbem
5608 
5609 static void emit_ARB1_TEXBEM(Context *ctx)
5610 {
5611     arb1_texbem(ctx, 0);
5612 } // emit_ARB1_TEXBEM
5613 
5614 static void emit_ARB1_TEXBEML(Context *ctx)
5615 {
5616     arb1_texbem(ctx, 1);
5617 } // emit_ARB1_TEXBEML
5618 
5619 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR)
5620 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB)
5621 
5622 
5623 static void emit_ARB1_TEXM3X2PAD(Context *ctx)
5624 {
5625     // no-op ... work happens in emit_ARB1_TEXM3X2TEX().
5626 } // emit_ARB1_TEXM3X2PAD
5627 
5628 static void emit_ARB1_TEXM3X2TEX(Context *ctx)
5629 {
5630     if (ctx->texm3x2pad_src0 == -1)
5631         return;
5632 
5633     char dst[64];
5634     char src0[64];
5635     char src1[64];
5636     char src2[64];
5637 
5638     // !!! FIXME: this code counts on the register not having swizzles, etc.
5639     const int stage = ctx->dest_arg.regnum;
5640     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0,
5641                             src0, sizeof (src0));
5642     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0,
5643                             src1, sizeof (src1));
5644     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
5645                             src2, sizeof (src2));
5646     get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5647 
5648     output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, dst);
5649     output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
5650     output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, dst, stage);
5651     emit_ARB1_dest_modifiers(ctx);
5652 } // emit_ARB1_TEXM3X2TEX
5653 
5654 
5655 static void emit_ARB1_TEXM3X3PAD(Context *ctx)
5656 {
5657     // no-op ... work happens in emit_ARB1_TEXM3X3*().
5658 } // emit_ARB1_TEXM3X3PAD
5659 
5660 
5661 static void emit_ARB1_TEXM3X3TEX(Context *ctx)
5662 {
5663     if (ctx->texm3x3pad_src1 == -1)
5664         return;
5665 
5666     char dst[64];
5667     char src0[64];
5668     char src1[64];
5669     char src2[64];
5670     char src3[64];
5671     char src4[64];
5672 
5673     // !!! FIXME: this code counts on the register not having swizzles, etc.
5674     const int stage = ctx->dest_arg.regnum;
5675     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
5676                             src0, sizeof (src0));
5677     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
5678                             src1, sizeof (src1));
5679     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
5680                             src2, sizeof (src2));
5681     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
5682                             src3, sizeof (src3));
5683     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
5684                             src4, sizeof (src4));
5685     get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5686 
5687     RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
5688     const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
5689     const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
5690 
5691     output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
5692     output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
5693     output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
5694     output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, dst, stage, ttypestr);
5695     emit_ARB1_dest_modifiers(ctx);
5696 } // emit_ARB1_TEXM3X3TEX
5697 
5698 static void emit_ARB1_TEXM3X3SPEC(Context *ctx)
5699 {
5700     if (ctx->texm3x3pad_src1 == -1)
5701         return;
5702 
5703     char dst[64];
5704     char src0[64];
5705     char src1[64];
5706     char src2[64];
5707     char src3[64];
5708     char src4[64];
5709     char src5[64];
5710     char tmp[64];
5711     char tmp2[64];
5712 
5713     // !!! FIXME: this code counts on the register not having swizzles, etc.
5714     const int stage = ctx->dest_arg.regnum;
5715     allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
5716     allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2));
5717     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
5718                             src0, sizeof (src0));
5719     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
5720                             src1, sizeof (src1));
5721     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
5722                             src2, sizeof (src2));
5723     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
5724                             src3, sizeof (src3));
5725     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
5726                             src4, sizeof (src4));
5727     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum,
5728                             src5, sizeof (src5));
5729     get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5730 
5731     RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
5732     const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
5733     const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
5734 
5735     output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
5736     output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
5737     output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
5738     output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst);    // normal * normal
5739     output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, src5);  // normal * eyeray
5740 
5741     // !!! FIXME: This is goofy. There's got to be a way to do vector-wide
5742     // !!! FIXME:  divides or reciprocals...right?
5743     output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2);
5744     output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2);
5745     output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2);
5746     output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2);
5747     output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2);
5748 
5749     output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp);
5750     output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, src5);
5751     output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr);
5752     emit_ARB1_dest_modifiers(ctx);
5753 } // emit_ARB1_TEXM3X3SPEC
5754 
5755 static void emit_ARB1_TEXM3X3VSPEC(Context *ctx)
5756 {
5757     if (ctx->texm3x3pad_src1 == -1)
5758         return;
5759 
5760     char dst[64];
5761     char src0[64];
5762     char src1[64];
5763     char src2[64];
5764     char src3[64];
5765     char src4[64];
5766     char tmp[64];
5767     char tmp2[64];
5768     char tmp3[64];
5769 
5770     // !!! FIXME: this code counts on the register not having swizzles, etc.
5771     const int stage = ctx->dest_arg.regnum;
5772     allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
5773     allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2));
5774     allocate_ARB1_scratch_reg_name(ctx, tmp3, sizeof (tmp3));
5775     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
5776                             src0, sizeof (src0));
5777     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
5778                             src1, sizeof (src1));
5779     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
5780                             src2, sizeof (src2));
5781     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
5782                             src3, sizeof (src3));
5783     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
5784                             src4, sizeof (src4));
5785     get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5786 
5787     RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
5788     const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
5789     const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
5790 
5791     output_line(ctx, "MOV %s.x, %s.w;", tmp3, src0);
5792     output_line(ctx, "MOV %s.y, %s.w;", tmp3, src2);
5793     output_line(ctx, "MOV %s.z, %s.w;", tmp3, dst);
5794     output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
5795     output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
5796     output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
5797     output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst);    // normal * normal
5798     output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, tmp3);  // normal * eyeray
5799 
5800     // !!! FIXME: This is goofy. There's got to be a way to do vector-wide
5801     // !!! FIXME:  divides or reciprocals...right?
5802     output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2);
5803     output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2);
5804     output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2);
5805     output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2);
5806     output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2);
5807 
5808     output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp);
5809     output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, tmp3);
5810     output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr);
5811     emit_ARB1_dest_modifiers(ctx);
5812 } // emit_ARB1_TEXM3X3VSPEC
5813 
5814 static void emit_ARB1_EXPP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
5815 static void emit_ARB1_LOGP(Context *ctx) { arb1_log(ctx, "LG2"); }
5816 
5817 static void emit_ARB1_CND(Context *ctx)
5818 {
5819     char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5820     char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5821     char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
5822     char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
5823     char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
5824 
5825     // CND compares against 0.5, but we need to compare against 0.0...
5826     //  ...subtract to make up the difference.
5827     output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", tmp, src0);
5828     // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just
5829     //  switch src1 and src2 to get the same results.
5830     output_line(ctx, "CMP%s, %s, %s, %s;", dst, tmp, src2, src1);
5831     emit_ARB1_dest_modifiers(ctx);
5832 } // emit_ARB1_CND
5833 
5834 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB)
5835 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX)
5836 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH)
5837 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3)
5838 
5839 static void emit_ARB1_TEXM3X3(Context *ctx)
5840 {
5841     if (ctx->texm3x3pad_src1 == -1)
5842         return;
5843 
5844     char dst[64];
5845     char src0[64];
5846     char src1[64];
5847     char src2[64];
5848     char src3[64];
5849     char src4[64];
5850 
5851     // !!! FIXME: this code counts on the register not having swizzles, etc.
5852     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
5853                             src0, sizeof (src0));
5854     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
5855                             src1, sizeof (src1));
5856     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
5857                             src2, sizeof (src2));
5858     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
5859                             src3, sizeof (src3));
5860     get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
5861                             src4, sizeof (src4));
5862     get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
5863 
5864     output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
5865     output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
5866     output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
5867     output_line(ctx, "MOV %s.w, { 1.0, 1.0, 1.0, 1.0 };", dst);
5868     emit_ARB1_dest_modifiers(ctx);
5869 } // emit_ARB1_TEXM3X3
5870 
5871 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH)
5872 
5873 static void emit_ARB1_CMP(Context *ctx)
5874 {
5875     char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5876     char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5877     char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
5878     char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
5879     // D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just
5880     //  switch src1 and src2 to get the same results.
5881     output_line(ctx, "CMP%s, %s, %s, %s;", dst, src0, src2, src1);
5882     emit_ARB1_dest_modifiers(ctx);
5883 } // emit_ARB1_CMP
5884 
5885 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BEM)
5886 
5887 
5888 static void emit_ARB1_DP2ADD(Context *ctx)
5889 {
5890     if (support_nv4(ctx))  // nv4 has a built-in equivalent to DP2ADD.
5891         emit_ARB1_opcode_dsss(ctx, "DP2A");
5892     else
5893     {
5894         char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5895         char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
5896         char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
5897         char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
5898         char scratch[64];
5899 
5900         // DP2ADD is:
5901         //  dst = (src0.r * src1.r) + (src0.g * src1.g) + src2.replicate_swiz
5902         allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
5903         output_line(ctx, "MUL %s, %s, %s;", scratch, src0, src1);
5904         output_line(ctx, "ADD %s, %s.x, %s.y;", scratch, scratch, scratch);
5905         output_line(ctx, "ADD%s, %s.x, %s;", dst, scratch, src2);
5906         emit_ARB1_dest_modifiers(ctx);
5907     } // else
5908 } // emit_ARB1_DP2ADD
5909 
5910 
5911 static void emit_ARB1_DSX(Context *ctx)
5912 {
5913     if (support_nv2(ctx))  // nv2 has a built-in equivalent to DSX.
5914         emit_ARB1_opcode_ds(ctx, "DDX");
5915     else
5916         failf(ctx, "DSX unsupported in %s profile", ctx->profile->name);
5917 } // emit_ARB1_DSX
5918 
5919 
5920 static void emit_ARB1_DSY(Context *ctx)
5921 {
5922     if (support_nv2(ctx))  // nv2 has a built-in equivalent to DSY.
5923         emit_ARB1_opcode_ds(ctx, "DDY");
5924     else
5925         failf(ctx, "DSY unsupported in %s profile", ctx->profile->name);
5926 } // emit_ARB1_DSY
5927 
5928 static void arb1_texld(Context *ctx, const char *opcode, const int texldd)
5929 {
5930     // !!! FIXME: Hack: "TEXH" is invalid in nv4. Fix this more cleanly.
5931     if ((ctx->dest_arg.result_mod & MOD_PP) && (support_nv4(ctx)))
5932         ctx->dest_arg.result_mod &= ~MOD_PP;
5933 
5934     char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
5935 
5936     const int sm1 = !shader_version_atleast(ctx, 1, 4);
5937     const int regnum = sm1 ? ctx->dest_arg.regnum : ctx->source_args[1].regnum;
5938     RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, regnum);
5939 
5940     const char *ttype = NULL;
5941     char src0[64];
5942     if (sm1)
5943         get_ARB1_destarg_varname(ctx, src0, sizeof (src0));
5944     else
5945         get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
5946     //char src1[64]; get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));  // !!! FIXME: SRC_MOD?
5947 
5948     char src2[64] = { 0 };
5949     char src3[64] = { 0 };
5950 
5951     if (texldd)
5952     {
5953         make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
5954         make_ARB1_srcarg_string(ctx, 3, src3, sizeof (src3));
5955     } // if
5956 
5957     // !!! FIXME: this should be in state_TEXLD, not in the arb1/glsl emitters.
5958     if (sreg == NULL)
5959     {
5960         fail(ctx, "TEXLD using undeclared sampler");
5961         return;
5962     } // if
5963 
5964     // SM1 only specifies dst, so don't check swizzle there.
5965     if ( !sm1 && (!no_swizzle(ctx->source_args[1].swizzle)) )
5966     {
5967         // !!! FIXME: does this ever actually happen?
5968         fail(ctx, "BUG: can't handle TEXLD with sampler swizzle at the moment");
5969     } // if
5970 
5971     switch ((const TextureType) sreg->index)
5972     {
5973         case TEXTURE_TYPE_2D: ttype = "2D"; break; // !!! FIXME: "RECT"?
5974         case TEXTURE_TYPE_CUBE: ttype = "CUBE"; break;
5975         case TEXTURE_TYPE_VOLUME: ttype = "3D"; break;
5976         default: fail(ctx, "unknown texture type"); return;
5977     } // switch
5978 
5979     if (texldd)
5980     {
5981         output_line(ctx, "%s%s, %s, %s, %s, texture[%d], %s;", opcode, dst,
5982                     src0, src2, src3, regnum, ttype);
5983     } // if
5984     else
5985     {
5986         output_line(ctx, "%s%s, %s, texture[%d], %s;", opcode, dst, src0,
5987                     regnum, ttype);
5988     } // else
5989 } // arb1_texld
5990 
5991 
5992 static void emit_ARB1_TEXLDD(Context *ctx)
5993 {
5994     // With GL_NV_fragment_program2, we can use the TXD opcode.
5995     //  In stock arb1, we can settle for a standard texld, which isn't
5996     //  perfect, but oh well.
5997     if (support_nv2(ctx))
5998         arb1_texld(ctx, "TXD", 1);
5999     else
6000         arb1_texld(ctx, "TEX", 0);
6001 } // emit_ARB1_TEXLDD
6002 
6003 
6004 static void emit_ARB1_TEXLDL(Context *ctx)
6005 {
6006     if ((shader_is_vertex(ctx)) && (!support_nv3(ctx)))
6007     {
6008         failf(ctx, "Vertex shader TEXLDL unsupported in %s profile",
6009               ctx->profile->name);
6010         return;
6011     } // if
6012 
6013     else if ((shader_is_pixel(ctx)) && (!support_nv2(ctx)))
6014     {
6015         failf(ctx, "Pixel shader TEXLDL unsupported in %s profile",
6016               ctx->profile->name);
6017         return;
6018     } // if
6019 
6020     // !!! FIXME: this doesn't map exactly to TEXLDL. Review this.
6021     arb1_texld(ctx, "TXL", 0);
6022 } // emit_ARB1_TEXLDL
6023 
6024 
6025 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKP)
6026 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKC)
6027 
6028 static void emit_ARB1_IFC(Context *ctx)
6029 {
6030     if (support_nv2(ctx))
6031     {
6032         static const char *comps[] = {
6033             "", "SGTC", "SEQC", "SGEC", "SGTC", "SNEC", "SLEC"
6034         };
6035 
6036         if (ctx->instruction_controls >= STATICARRAYLEN(comps))
6037         {
6038             fail(ctx, "unknown comparison control");
6039             return;
6040         } // if
6041 
6042         char src0[64];
6043         char src1[64];
6044         char scratch[64];
6045 
6046         const char *comp = comps[ctx->instruction_controls];
6047         get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
6048         get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));
6049         allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
6050         output_line(ctx, "%s %s.x, %s, %s;", comp, scratch, src0, src1);
6051         nv2_if(ctx);
6052     } // if
6053 
6054     else  // stock ARB1 has no branching.
6055     {
6056         failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
6057     } // else
6058 } // emit_ARB1_IFC
6059 
6060 
6061 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(SETP)
6062 
6063 static void emit_ARB1_DEF(Context *ctx)
6064 {
6065     const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
6066     char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
6067     char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1);
6068     char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1);
6069     char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1);
6070     char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1);
6071 
6072     push_output(ctx, &ctx->globals);
6073     output_line(ctx, "PARAM %s = { %s, %s, %s, %s };",
6074                 dst, val0, val1, val2, val3);
6075     pop_output(ctx);
6076 } // emit_ARB1_DEF
6077 
6078 static void emit_ARB1_DEFI(Context *ctx)
6079 {
6080     char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
6081     const int32 *x = (const int32 *) ctx->dwords;
6082     push_output(ctx, &ctx->globals);
6083     output_line(ctx, "PARAM %s = { %d, %d, %d, %d };",
6084                 dst, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
6085     pop_output(ctx);
6086 } // emit_ARB1_DEFI
6087 
6088 static void emit_ARB1_DEFB(Context *ctx)
6089 {
6090     char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
6091     push_output(ctx, &ctx->globals);
6092     output_line(ctx, "PARAM %s = %d;", dst, ctx->dwords[0] ? 1 : 0);
6093     pop_output(ctx);
6094 } // emit_ARB1_DEFB
6095 
6096 static void emit_ARB1_DCL(Context *ctx)
6097 {
6098     // no-op. We do this in our emit_attribute() and emit_uniform().
6099 } // emit_ARB1_DCL
6100 
6101 EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
6102 
6103 static void emit_ARB1_TEXLD(Context *ctx)
6104 {
6105     if (!shader_version_atleast(ctx, 1, 4))
6106     {
6107         arb1_texld(ctx, "TEX", 0);
6108         return;
6109     } // if
6110 
6111     else if (!shader_version_atleast(ctx, 2, 0))
6112     {
6113         // ps_1_4 is different, too!
6114         fail(ctx, "TEXLD == Shader Model 1.4 unimplemented.");  // !!! FIXME
6115         return;
6116     } // if
6117 
6118     // !!! FIXME: do texldb and texldp map between OpenGL and D3D correctly?
6119     if (ctx->instruction_controls == CONTROL_TEXLD)
6120         arb1_texld(ctx, "TEX", 0);
6121     else if (ctx->instruction_controls == CONTROL_TEXLDP)
6122         arb1_texld(ctx, "TXP", 0);
6123     else if (ctx->instruction_controls == CONTROL_TEXLDB)
6124         arb1_texld(ctx, "TXB", 0);
6125 } // emit_ARB1_TEXLD
6126 
6127 #endif  // SUPPORT_PROFILE_ARB1
6128 
6129 
6130 #if !AT_LEAST_ONE_PROFILE
6131 #error No profiles are supported. Fix your build.
6132 #endif
6133 
6134 #define DEFINE_PROFILE(prof) { \
6135     MOJOSHADER_PROFILE_##prof, \
6136     emit_##prof##_start, \
6137     emit_##prof##_end, \
6138     emit_##prof##_phase, \
6139     emit_##prof##_global, \
6140     emit_##prof##_array, \
6141     emit_##prof##_const_array, \
6142     emit_##prof##_uniform, \
6143     emit_##prof##_sampler, \
6144     emit_##prof##_attribute, \
6145     emit_##prof##_finalize, \
6146     get_##prof##_varname, \
6147     get_##prof##_const_array_varname, \
6148 },
6149 
6150 static const Profile profiles[] =
6151 {
6152 #if SUPPORT_PROFILE_D3D
6153     DEFINE_PROFILE(D3D)
6154 #endif
6155 #if SUPPORT_PROFILE_BYTECODE
6156     DEFINE_PROFILE(BYTECODE)
6157 #endif
6158 #if SUPPORT_PROFILE_GLSL
6159     DEFINE_PROFILE(GLSL)
6160 #endif
6161 #if SUPPORT_PROFILE_ARB1
6162     DEFINE_PROFILE(ARB1)
6163 #endif
6164 };
6165 
6166 #undef DEFINE_PROFILE
6167 
6168 // This is for profiles that extend other profiles...
6169 static const struct { const char *from; const char *to; } profileMap[] =
6170 {
6171     { MOJOSHADER_PROFILE_GLSL120, MOJOSHADER_PROFILE_GLSL },
6172     { MOJOSHADER_PROFILE_NV2, MOJOSHADER_PROFILE_ARB1 },
6173     { MOJOSHADER_PROFILE_NV3, MOJOSHADER_PROFILE_ARB1 },
6174     { MOJOSHADER_PROFILE_NV4, MOJOSHADER_PROFILE_ARB1 },
6175 };
6176 
6177 
6178 // The PROFILE_EMITTER_* items MUST be in the same order as profiles[]!
6179 #define PROFILE_EMITTERS(op) { \
6180      PROFILE_EMITTER_D3D(op) \
6181      PROFILE_EMITTER_BYTECODE(op) \
6182      PROFILE_EMITTER_GLSL(op) \
6183      PROFILE_EMITTER_ARB1(op) \
6184 }
6185 
parse_destination_token(Context * ctx,DestArgInfo * info)6186 static int parse_destination_token(Context *ctx, DestArgInfo *info)
6187 {
6188     // !!! FIXME: recheck against the spec for ranges (like RASTOUT values, etc).
6189     if (ctx->tokencount == 0)
6190     {
6191         fail(ctx, "Out of tokens in destination parameter");
6192         return 0;
6193     } // if
6194 
6195     const uint32 token = SWAP32(*(ctx->tokens));
6196     const int reserved1 = (int) ((token >> 14) & 0x3); // bits 14 through 15
6197     const int reserved2 = (int) ((token >> 31) & 0x1); // bit 31
6198 
6199     info->token = ctx->tokens;
6200     info->regnum = (int) (token & 0x7ff);  // bits 0 through 10
6201     info->relative = (int) ((token >> 13) & 0x1); // bit 13
6202     info->orig_writemask = (int) ((token >> 16) & 0xF); // bits 16 through 19
6203     info->result_mod = (int) ((token >> 20) & 0xF); // bits 20 through 23
6204     info->result_shift = (int) ((token >> 24) & 0xF); // bits 24 through 27      abc
6205     info->regtype = (RegisterType) (((token >> 28) & 0x7) | ((token >> 8) & 0x18));  // bits 28-30, 11-12
6206 
6207     int writemask;
6208     if (isscalar(ctx, ctx->shader_type, info->regtype, info->regnum))
6209         writemask = 0x1;  // just x.
6210     else
6211         writemask = info->orig_writemask;
6212 
6213     set_dstarg_writemask(info, writemask);  // bits 16 through 19.
6214 
6215     // all the REG_TYPE_CONSTx types are the same register type, it's just
6216     //  split up so its regnum can be > 2047 in the bytecode. Clean it up.
6217     if (info->regtype == REG_TYPE_CONST2)
6218     {
6219         info->regtype = REG_TYPE_CONST;
6220         info->regnum += 2048;
6221     } // else if
6222     else if (info->regtype == REG_TYPE_CONST3)
6223     {
6224         info->regtype = REG_TYPE_CONST;
6225         info->regnum += 4096;
6226     } // else if
6227     else if (info->regtype == REG_TYPE_CONST4)
6228     {
6229         info->regtype = REG_TYPE_CONST;
6230         info->regnum += 6144;
6231     } // else if
6232 
6233     // swallow token for now, for multiple calls in a row.
6234     adjust_token_position(ctx, 1);
6235 
6236     if (reserved1 != 0x0)
6237         fail(ctx, "Reserved bit #1 in destination token must be zero");
6238 
6239     if (reserved2 != 0x1)
6240         fail(ctx, "Reserved bit #2 in destination token must be one");
6241 
6242     if (info->relative)
6243     {
6244         if (!shader_is_vertex(ctx))
6245             fail(ctx, "Relative addressing in non-vertex shader");
6246         if (!shader_version_atleast(ctx, 3, 0))
6247             fail(ctx, "Relative addressing in vertex shader version < 3.0");
6248         if ((!ctx->ctab.have_ctab) && (!ctx->ignores_ctab))
6249         {
6250             // it's hard to do this efficiently without!
6251             fail(ctx, "relative addressing unsupported without a CTAB");
6252         } // if
6253 
6254         // !!! FIXME: I don't have a shader that has a relative dest currently.
6255         fail(ctx, "Relative addressing of dest tokens is unsupported");
6256         return 2;
6257     } // if
6258 
6259     const int s = info->result_shift;
6260     if (s != 0)
6261     {
6262         if (!shader_is_pixel(ctx))
6263             fail(ctx, "Result shift scale in non-pixel shader");
6264         if (shader_version_atleast(ctx, 2, 0))
6265             fail(ctx, "Result shift scale in pixel shader version >= 2.0");
6266         if ( ! (((s >= 1) && (s <= 3)) || ((s >= 0xD) && (s <= 0xF))) )
6267             fail(ctx, "Result shift scale isn't 1 to 3, or 13 to 15.");
6268     } // if
6269 
6270     if (info->result_mod & MOD_PP)  // Partial precision (pixel shaders only)
6271     {
6272         if (!shader_is_pixel(ctx))
6273             fail(ctx, "Partial precision result mod in non-pixel shader");
6274     } // if
6275 
6276     if (info->result_mod & MOD_CENTROID)  // Centroid (pixel shaders only)
6277     {
6278         if (!shader_is_pixel(ctx))
6279             fail(ctx, "Centroid result mod in non-pixel shader");
6280         else if (!ctx->centroid_allowed)  // only on DCL opcodes!
6281             fail(ctx, "Centroid modifier not allowed here");
6282     } // if
6283 
6284     if ((info->regtype < 0) || (info->regtype > REG_TYPE_MAX))
6285         fail(ctx, "Register type is out of range");
6286 
6287     if (!isfail(ctx))
6288         set_used_register(ctx, info->regtype, info->regnum, 1);
6289 
6290     return 1;
6291 } // parse_destination_token
6292 
6293 
determine_constants_arrays(Context * ctx)6294 static void determine_constants_arrays(Context *ctx)
6295 {
6296     // Only process this stuff once. This is called after all DEF* opcodes
6297     //  could have been parsed.
6298     if (ctx->determined_constants_arrays)
6299         return;
6300 
6301     ctx->determined_constants_arrays = 1;
6302 
6303     if (ctx->constant_count <= 1)
6304         return;  // nothing to sort or group.
6305 
6306     // Sort the linked list into an array for easier tapdancing...
6307     ConstantsList **array = (ConstantsList **) alloca(sizeof (ConstantsList *) * (ctx->constant_count + 1));
6308     ConstantsList *item = ctx->constants;
6309     int i;
6310 
6311     for (i = 0; i < ctx->constant_count; i++)
6312     {
6313         if (item == NULL)
6314         {
6315             fail(ctx, "BUG: mismatched constant list and count");
6316             return;
6317         } // if
6318 
6319         array[i] = item;
6320         item = item->next;
6321     } // for
6322 
6323     array[ctx->constant_count] = NULL;
6324 
6325     // bubble sort ftw.
6326     int sorted;
6327     do
6328     {
6329         sorted = 1;
6330         for (i = 0; i < ctx->constant_count-1; i++)
6331         {
6332             if (array[i]->constant.index > array[i+1]->constant.index)
6333             {
6334                 ConstantsList *tmp = array[i];
6335                 array[i] = array[i+1];
6336                 array[i+1] = tmp;
6337                 sorted = 0;
6338             } // if
6339         } // for
6340     } while (!sorted);
6341 
6342     // okay, sorted. While we're here, let's redo the linked list in order...
6343     for (i = 0; i < ctx->constant_count; i++)
6344         array[i]->next = array[i+1];
6345     ctx->constants = array[0];
6346 
6347     // now figure out the groupings of constants and add to ctx->variables...
6348     int start = -1;
6349     int prev = -1;
6350     int count = 0;
6351     const int hi = ctx->constant_count;
6352     for (i = 0; i <= hi; i++)
6353     {
6354         if (array[i] && (array[i]->constant.type != MOJOSHADER_UNIFORM_FLOAT))
6355             continue;  // we only care about REG_TYPE_CONST for array groups.
6356 
6357         if (start == -1)
6358         {
6359             prev = start = i;  // first REG_TYPE_CONST we've seen. Mark it!
6360             continue;
6361         } // if
6362 
6363         // not a match (or last item in the array)...see if we had a
6364         //  contiguous set before this point...
6365         if ( (array[i]) && (array[i]->constant.index == (array[prev]->constant.index + 1)) )
6366             count++;
6367         else
6368         {
6369             if (count > 0)  // multiple constants in the set?
6370             {
6371                 VariableList *var;
6372                 var = (VariableList *) Malloc(ctx, sizeof (VariableList));
6373                 if (var == NULL)
6374                     break;
6375 
6376                 var->type = MOJOSHADER_UNIFORM_FLOAT;
6377                 var->index = array[start]->constant.index;
6378                 var->count = (array[prev]->constant.index - var->index) + 1;
6379                 var->constant = array[start];
6380                 var->used = 0;
6381                 var->emit_position = -1;
6382                 var->next = ctx->variables;
6383                 ctx->variables = var;
6384             } // else
6385 
6386             start = i;   // set this as new start of sequence.
6387         } // if
6388 
6389         prev = i;
6390     } // for
6391 } // determine_constants_arrays
6392 
6393 
adjust_swizzle(const Context * ctx,const RegisterType regtype,const int regnum,const int swizzle)6394 static int adjust_swizzle(const Context *ctx, const RegisterType regtype,
6395                           const int regnum, const int swizzle)
6396 {
6397     if (regtype != REG_TYPE_INPUT)  // !!! FIXME: maybe lift this later?
6398         return swizzle;
6399     else if (ctx->swizzles_count == 0)
6400         return swizzle;
6401 
6402     const RegisterList *reg = reglist_find(&ctx->attributes, regtype, regnum);
6403     if (reg == NULL)
6404         return swizzle;
6405 
6406     size_t i;
6407     for (i = 0; i < ctx->swizzles_count; i++)
6408     {
6409         const MOJOSHADER_swizzle *swiz = &ctx->swizzles[i];
6410         if ((swiz->usage == reg->usage) && (swiz->index == reg->index))
6411         {
6412             return ( (((int)(swiz->swizzles[((swizzle >> 0) & 0x3)])) << 0) |
6413                      (((int)(swiz->swizzles[((swizzle >> 2) & 0x3)])) << 2) |
6414                      (((int)(swiz->swizzles[((swizzle >> 4) & 0x3)])) << 4) |
6415                      (((int)(swiz->swizzles[((swizzle >> 6) & 0x3)])) << 6) );
6416         } // if
6417     } // for
6418 
6419     return swizzle;
6420 } // adjust_swizzle
6421 
6422 
parse_source_token(Context * ctx,SourceArgInfo * info)6423 static int parse_source_token(Context *ctx, SourceArgInfo *info)
6424 {
6425     int retval = 1;
6426 
6427     if (ctx->tokencount == 0)
6428     {
6429         fail(ctx, "Out of tokens in source parameter");
6430         return 0;
6431     } // if
6432 
6433     const uint32 token = SWAP32(*(ctx->tokens));
6434     const int reserved1 = (int) ((token >> 14) & 0x3); // bits 14 through 15
6435     const int reserved2 = (int) ((token >> 31) & 0x1); // bit 31
6436 
6437     info->token = ctx->tokens;
6438     info->regnum = (int) (token & 0x7ff);  // bits 0 through 10
6439     info->relative = (int) ((token >> 13) & 0x1); // bit 13
6440     const int swizzle = (int) ((token >> 16) & 0xFF); // bits 16 through 23
6441     info->src_mod = (SourceMod) ((token >> 24) & 0xF); // bits 24 through 27
6442     info->regtype = (RegisterType) (((token >> 28) & 0x7) | ((token >> 8) & 0x18));  // bits 28-30, 11-12
6443 
6444     // all the REG_TYPE_CONSTx types are the same register type, it's just
6445     //  split up so its regnum can be > 2047 in the bytecode. Clean it up.
6446     if (info->regtype == REG_TYPE_CONST2)
6447     {
6448         info->regtype = REG_TYPE_CONST;
6449         info->regnum += 2048;
6450     } // else if
6451     else if (info->regtype == REG_TYPE_CONST3)
6452     {
6453         info->regtype = REG_TYPE_CONST;
6454         info->regnum += 4096;
6455     } // else if
6456     else if (info->regtype == REG_TYPE_CONST4)
6457     {
6458         info->regtype = REG_TYPE_CONST;
6459         info->regnum += 6144;
6460     } // else if
6461 
6462     info->swizzle = adjust_swizzle(ctx, info->regtype, info->regnum, swizzle);
6463     info->swizzle_x = ((info->swizzle >> 0) & 0x3);
6464     info->swizzle_y = ((info->swizzle >> 2) & 0x3);
6465     info->swizzle_z = ((info->swizzle >> 4) & 0x3);
6466     info->swizzle_w = ((info->swizzle >> 6) & 0x3);
6467 
6468     // swallow token for now, for multiple calls in a row.
6469     adjust_token_position(ctx, 1);
6470 
6471     if (reserved1 != 0x0)
6472         fail(ctx, "Reserved bits #1 in source token must be zero");
6473 
6474     if (reserved2 != 0x1)
6475         fail(ctx, "Reserved bit #2 in source token must be one");
6476 
6477     if ((info->relative) && (ctx->tokencount == 0))
6478     {
6479         fail(ctx, "Out of tokens in relative source parameter");
6480         info->relative = 0;  // don't try to process it.
6481     } // if
6482 
6483     if (info->relative)
6484     {
6485         if ( (shader_is_pixel(ctx)) && (!shader_version_atleast(ctx, 3, 0)) )
6486             fail(ctx, "Relative addressing in pixel shader version < 3.0");
6487 
6488         const uint32 reltoken = SWAP32(*(ctx->tokens));
6489         // swallow token for now, for multiple calls in a row.
6490         adjust_token_position(ctx, 1);
6491 
6492         const int relswiz = (int) ((reltoken >> 16) & 0xFF);
6493         info->relative_regnum = (int) (reltoken & 0x7ff);
6494         info->relative_regtype = (RegisterType)
6495                                     (((reltoken >> 28) & 0x7) |
6496                                     ((reltoken >> 8) & 0x18));
6497 
6498         if (((reltoken >> 31) & 0x1) == 0)
6499             fail(ctx, "bit #31 in relative address must be set");
6500 
6501         if ((reltoken & 0xF00E000) != 0)  // usused bits.
6502             fail(ctx, "relative address reserved bit must be zero");
6503 
6504         switch (info->relative_regtype)
6505         {
6506             case REG_TYPE_LOOP:
6507             case REG_TYPE_ADDRESS:
6508                 break;
6509             default:
6510                 fail(ctx, "invalid register for relative address");
6511                 break;
6512         } // switch
6513 
6514         if (info->relative_regnum != 0)  // true for now.
6515             fail(ctx, "invalid register for relative address");
6516 
6517         if (!replicate_swizzle(relswiz))
6518             fail(ctx, "relative address needs replicate swizzle");
6519 
6520         info->relative_component = (relswiz & 0x3);
6521 
6522         if (info->regtype == REG_TYPE_INPUT)
6523         {
6524             if ( (shader_is_pixel(ctx)) || (!shader_version_atleast(ctx, 3, 0)) )
6525                 fail(ctx, "relative addressing of input registers not supported in this shader model");
6526             ctx->have_relative_input_registers = 1;
6527         } // if
6528         else if (info->regtype == REG_TYPE_CONST)
6529         {
6530             // figure out what array we're in...
6531             if (!ctx->ignores_ctab)
6532             {
6533                 if (!ctx->ctab.have_ctab)  // hard to do efficiently without!
6534                     fail(ctx, "relative addressing unsupported without a CTAB");
6535                 else
6536                 {
6537                     determine_constants_arrays(ctx);
6538 
6539                     VariableList *var;
6540                     const int reltarget = info->regnum;
6541                     for (var = ctx->variables; var != NULL; var = var->next)
6542                     {
6543                         const int lo = var->index;
6544                         if ( (reltarget >= lo) && (reltarget < (lo + var->count)) )
6545                             break;  // match!
6546                     } // for
6547 
6548                     if (var == NULL)
6549                         fail(ctx, "relative addressing of indeterminate array");
6550                     else
6551                     {
6552                         var->used = 1;
6553                         info->relative_array = var;
6554                         set_used_register(ctx, info->relative_regtype, info->relative_regnum, 0);
6555                     } // else
6556                 } // else
6557             } // if
6558         } // else if
6559         else
6560         {
6561             fail(ctx, "relative addressing of invalid register");
6562         } // else
6563 
6564         retval++;
6565     } // if
6566 
6567     switch (info->src_mod)
6568     {
6569         case SRCMOD_NONE:
6570         case SRCMOD_ABSNEGATE:
6571         case SRCMOD_ABS:
6572         case SRCMOD_NEGATE:
6573             break; // okay in any shader model.
6574 
6575         // apparently these are only legal in Shader Model 1.x ...
6576         case SRCMOD_BIASNEGATE:
6577         case SRCMOD_BIAS:
6578         case SRCMOD_SIGNNEGATE:
6579         case SRCMOD_SIGN:
6580         case SRCMOD_COMPLEMENT:
6581         case SRCMOD_X2NEGATE:
6582         case SRCMOD_X2:
6583         case SRCMOD_DZ:
6584         case SRCMOD_DW:
6585             if (shader_version_atleast(ctx, 2, 0))
6586                 fail(ctx, "illegal source mod for this Shader Model.");
6587             break;
6588 
6589         case SRCMOD_NOT:  // !!! FIXME: I _think_ this is right...
6590             if (shader_version_atleast(ctx, 2, 0))
6591             {
6592                 if (info->regtype != REG_TYPE_PREDICATE)
6593                     fail(ctx, "NOT only allowed on predicate register.");
6594             } // if
6595             break;
6596 
6597         default:
6598             fail(ctx, "Unknown source modifier");
6599     } // switch
6600 
6601     // !!! FIXME: docs say this for sm3 ... check these!
6602     //  "The negate modifier cannot be used on second source register of these
6603     //   instructions: m3x2 - ps, m3x3 - ps, m3x4 - ps, m4x3 - ps, and
6604     //   m4x4 - ps."
6605     //  "If any version 3 shader reads from one or more constant float
6606     //   registers (c#), one of the following must be true.
6607     //    All of the constant floating-point registers must use the abs modifier.
6608     //    None of the constant floating-point registers can use the abs modifier.
6609 
6610     if (!isfail(ctx))
6611     {
6612         RegisterList *reg;
6613         reg = set_used_register(ctx, info->regtype, info->regnum, 0);
6614         // !!! FIXME: this test passes if you write to the register
6615         // !!! FIXME:  in this same instruction, because we parse the
6616         // !!! FIXME:  destination token first.
6617         // !!! FIXME: Microsoft's shader validation explicitly checks temp
6618         // !!! FIXME:  registers for this...do they check other writable ones?
6619         if ((info->regtype == REG_TYPE_TEMP) && (reg) && (!reg->written))
6620             failf(ctx, "Temp register r%d used uninitialized", info->regnum);
6621     } // if
6622 
6623     return retval;
6624 } // parse_source_token
6625 
6626 
parse_predicated_token(Context * ctx)6627 static int parse_predicated_token(Context *ctx)
6628 {
6629     SourceArgInfo *arg = &ctx->predicate_arg;
6630     parse_source_token(ctx, arg);
6631     if (arg->regtype != REG_TYPE_PREDICATE)
6632         fail(ctx, "Predicated instruction but not predicate register!");
6633     if ((arg->src_mod != SRCMOD_NONE) && (arg->src_mod != SRCMOD_NOT))
6634         fail(ctx, "Predicated instruction register is not NONE or NOT");
6635     if ( !no_swizzle(arg->swizzle) && !replicate_swizzle(arg->swizzle) )
6636         fail(ctx, "Predicated instruction register has wrong swizzle");
6637     if (arg->relative)  // I'm pretty sure this is illegal...?
6638         fail(ctx, "relative addressing in predicated token");
6639 
6640     return 1;
6641 } // parse_predicated_token
6642 
6643 
parse_args_NULL(Context * ctx)6644 static int parse_args_NULL(Context *ctx)
6645 {
6646     return 1;
6647 } // parse_args_NULL
6648 
6649 
parse_args_DEF(Context * ctx)6650 static int parse_args_DEF(Context *ctx)
6651 {
6652     parse_destination_token(ctx, &ctx->dest_arg);
6653     if (ctx->dest_arg.regtype != REG_TYPE_CONST)
6654         fail(ctx, "DEF using non-CONST register");
6655     if (ctx->dest_arg.relative)  // I'm pretty sure this is illegal...?
6656         fail(ctx, "relative addressing in DEF");
6657 
6658     ctx->dwords[0] = SWAP32(ctx->tokens[0]);
6659     ctx->dwords[1] = SWAP32(ctx->tokens[1]);
6660     ctx->dwords[2] = SWAP32(ctx->tokens[2]);
6661     ctx->dwords[3] = SWAP32(ctx->tokens[3]);
6662 
6663     return 6;
6664 } // parse_args_DEF
6665 
6666 
parse_args_DEFI(Context * ctx)6667 static int parse_args_DEFI(Context *ctx)
6668 {
6669     parse_destination_token(ctx, &ctx->dest_arg);
6670     if (ctx->dest_arg.regtype != REG_TYPE_CONSTINT)
6671         fail(ctx, "DEFI using non-CONSTING register");
6672     if (ctx->dest_arg.relative)  // I'm pretty sure this is illegal...?
6673         fail(ctx, "relative addressing in DEFI");
6674 
6675     ctx->dwords[0] = SWAP32(ctx->tokens[0]);
6676     ctx->dwords[1] = SWAP32(ctx->tokens[1]);
6677     ctx->dwords[2] = SWAP32(ctx->tokens[2]);
6678     ctx->dwords[3] = SWAP32(ctx->tokens[3]);
6679 
6680     return 6;
6681 } // parse_args_DEFI
6682 
6683 
parse_args_DEFB(Context * ctx)6684 static int parse_args_DEFB(Context *ctx)
6685 {
6686     parse_destination_token(ctx, &ctx->dest_arg);
6687     if (ctx->dest_arg.regtype != REG_TYPE_CONSTBOOL)
6688         fail(ctx, "DEFB using non-CONSTBOOL register");
6689     if (ctx->dest_arg.relative)  // I'm pretty sure this is illegal...?
6690         fail(ctx, "relative addressing in DEFB");
6691 
6692     ctx->dwords[0] = *(ctx->tokens) ? 1 : 0;
6693 
6694     return 3;
6695 } // parse_args_DEFB
6696 
6697 
valid_texture_type(const uint32 ttype)6698 static int valid_texture_type(const uint32 ttype)
6699 {
6700     switch ((const TextureType) ttype)
6701     {
6702         case TEXTURE_TYPE_2D:
6703         case TEXTURE_TYPE_CUBE:
6704         case TEXTURE_TYPE_VOLUME:
6705             return 1;  // it's okay.
6706     } // switch
6707 
6708     return 0;
6709 } // valid_texture_type
6710 
6711 
6712 // !!! FIXME: this function is kind of a mess.
parse_args_DCL(Context * ctx)6713 static int parse_args_DCL(Context *ctx)
6714 {
6715     int unsupported = 0;
6716     const uint32 token = SWAP32(*(ctx->tokens));
6717     const int reserved1 = (int) ((token >> 31) & 0x1); // bit 31
6718     uint32 reserved_mask = 0x00000000;
6719 
6720     if (reserved1 != 0x1)
6721         fail(ctx, "Bit #31 in DCL token must be one");
6722 
6723     ctx->centroid_allowed = 1;
6724     adjust_token_position(ctx, 1);
6725     parse_destination_token(ctx, &ctx->dest_arg);
6726     ctx->centroid_allowed = 0;
6727 
6728     if (ctx->dest_arg.result_shift != 0)  // I'm pretty sure this is illegal...?
6729         fail(ctx, "shift scale in DCL");
6730     if (ctx->dest_arg.relative)  // I'm pretty sure this is illegal...?
6731         fail(ctx, "relative addressing in DCL");
6732 
6733     const RegisterType regtype = ctx->dest_arg.regtype;
6734     const int regnum = ctx->dest_arg.regnum;
6735     if ( (shader_is_pixel(ctx)) && (shader_version_atleast(ctx, 3, 0)) )
6736     {
6737         if (regtype == REG_TYPE_INPUT)
6738         {
6739             const uint32 usage = (token & 0xF);
6740             const uint32 index = ((token >> 16) & 0xF);
6741             reserved_mask = 0x7FF0FFE0;
6742             ctx->dwords[0] = usage;
6743             ctx->dwords[1] = index;
6744         } // if
6745 
6746         else if (regtype == REG_TYPE_MISCTYPE)
6747         {
6748             const MiscTypeType mt = (MiscTypeType) regnum;
6749             if (mt == MISCTYPE_TYPE_POSITION)
6750                 reserved_mask = 0x7FFFFFFF;
6751             else if (mt == MISCTYPE_TYPE_FACE)
6752             {
6753                 reserved_mask = 0x7FFFFFFF;
6754                 if (!writemask_xyzw(ctx->dest_arg.orig_writemask))
6755                     fail(ctx, "DCL face writemask must be full");
6756                 if (ctx->dest_arg.result_mod != 0)
6757                     fail(ctx, "DCL face result modifier must be zero");
6758                 if (ctx->dest_arg.result_shift != 0)
6759                     fail(ctx, "DCL face shift scale must be zero");
6760             } // else if
6761             else
6762             {
6763                 unsupported = 1;
6764             } // else
6765 
6766             ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_UNKNOWN;
6767             ctx->dwords[1] = 0;
6768         } // else if
6769 
6770         else if (regtype == REG_TYPE_TEXTURE)
6771         {
6772             const uint32 usage = (token & 0xF);
6773             const uint32 index = ((token >> 16) & 0xF);
6774             if (usage == MOJOSHADER_USAGE_TEXCOORD)
6775             {
6776                 if (index > 7)
6777                     fail(ctx, "DCL texcoord usage must have 0-7 index");
6778             } // if
6779             else if (usage == MOJOSHADER_USAGE_COLOR)
6780             {
6781                 if (index != 0)
6782                     fail(ctx, "DCL color usage must have 0 index");
6783             } // else if
6784             else
6785             {
6786                 fail(ctx, "Invalid DCL texture usage");
6787             } // else
6788 
6789             reserved_mask = 0x7FF0FFE0;
6790             ctx->dwords[0] = usage;
6791             ctx->dwords[1] = index;
6792         } // else if
6793 
6794         else if (regtype == REG_TYPE_SAMPLER)
6795         {
6796             const uint32 ttype = ((token >> 27) & 0xF);
6797             if (!valid_texture_type(ttype))
6798                 fail(ctx, "unknown sampler texture type");
6799             reserved_mask = 0x7FFFFFF;
6800             ctx->dwords[0] = ttype;
6801         } // else if
6802 
6803         else
6804         {
6805             unsupported = 1;
6806         } // else
6807     } // if
6808 
6809     else if ( (shader_is_pixel(ctx)) && (shader_version_atleast(ctx, 2, 0)) )
6810     {
6811         if (regtype == REG_TYPE_INPUT)
6812         {
6813             ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_COLOR;
6814             ctx->dwords[1] = regnum;
6815             reserved_mask = 0x7FFFFFFF;
6816         } // if
6817         else if (regtype == REG_TYPE_TEXTURE)
6818         {
6819             ctx->dwords[0] = (uint32) MOJOSHADER_USAGE_TEXCOORD;
6820             ctx->dwords[1] = regnum;
6821             reserved_mask = 0x7FFFFFFF;
6822         } // else if
6823         else if (regtype == REG_TYPE_SAMPLER)
6824         {
6825             const uint32 ttype = ((token >> 27) & 0xF);
6826             if (!valid_texture_type(ttype))
6827                 fail(ctx, "unknown sampler texture type");
6828             reserved_mask = 0x7FFFFFF;
6829             ctx->dwords[0] = ttype;
6830         } // else if
6831         else
6832         {
6833             unsupported = 1;
6834         } // else
6835     } // if
6836 
6837     else if ( (shader_is_vertex(ctx)) && (shader_version_atleast(ctx, 3, 0)) )
6838     {
6839         if ((regtype == REG_TYPE_INPUT) || (regtype == REG_TYPE_OUTPUT))
6840         {
6841             const uint32 usage = (token & 0xF);
6842             const uint32 index = ((token >> 16) & 0xF);
6843             reserved_mask = 0x7FF0FFE0;
6844             ctx->dwords[0] = usage;
6845             ctx->dwords[1] = index;
6846         } // if
6847         else
6848         {
6849             unsupported = 1;
6850         } // else
6851     } // else if
6852 
6853     else if ( (shader_is_vertex(ctx)) && (shader_version_atleast(ctx, 1, 1)) )
6854     {
6855         if (regtype == REG_TYPE_INPUT)
6856         {
6857             const uint32 usage = (token & 0xF);
6858             const uint32 index = ((token >> 16) & 0xF);
6859             reserved_mask = 0x7FF0FFE0;
6860             ctx->dwords[0] = usage;
6861             ctx->dwords[1] = index;
6862         } // if
6863         else
6864         {
6865             unsupported = 1;
6866         } // else
6867     } // else if
6868 
6869     else
6870     {
6871         unsupported = 1;
6872     } // else
6873 
6874     if (unsupported)
6875         fail(ctx, "invalid DCL register type for this shader model");
6876 
6877     if ((token & reserved_mask) != 0)
6878         fail(ctx, "reserved bits in DCL dword aren't zero");
6879 
6880     return 3;
6881 } // parse_args_DCL
6882 
6883 
parse_args_D(Context * ctx)6884 static int parse_args_D(Context *ctx)
6885 {
6886     int retval = 1;
6887     retval += parse_destination_token(ctx, &ctx->dest_arg);
6888     return retval;
6889 } // parse_args_D
6890 
6891 
parse_args_S(Context * ctx)6892 static int parse_args_S(Context *ctx)
6893 {
6894     int retval = 1;
6895     retval += parse_source_token(ctx, &ctx->source_args[0]);
6896     return retval;
6897 } // parse_args_S
6898 
6899 
parse_args_SS(Context * ctx)6900 static int parse_args_SS(Context *ctx)
6901 {
6902     int retval = 1;
6903     retval += parse_source_token(ctx, &ctx->source_args[0]);
6904     retval += parse_source_token(ctx, &ctx->source_args[1]);
6905     return retval;
6906 } // parse_args_SS
6907 
6908 
parse_args_DS(Context * ctx)6909 static int parse_args_DS(Context *ctx)
6910 {
6911     int retval = 1;
6912     retval += parse_destination_token(ctx, &ctx->dest_arg);
6913     retval += parse_source_token(ctx, &ctx->source_args[0]);
6914     return retval;
6915 } // parse_args_DS
6916 
6917 
parse_args_DSS(Context * ctx)6918 static int parse_args_DSS(Context *ctx)
6919 {
6920     int retval = 1;
6921     retval += parse_destination_token(ctx, &ctx->dest_arg);
6922     retval += parse_source_token(ctx, &ctx->source_args[0]);
6923     retval += parse_source_token(ctx, &ctx->source_args[1]);
6924     return retval;
6925 } // parse_args_DSS
6926 
6927 
parse_args_DSSS(Context * ctx)6928 static int parse_args_DSSS(Context *ctx)
6929 {
6930     int retval = 1;
6931     retval += parse_destination_token(ctx, &ctx->dest_arg);
6932     retval += parse_source_token(ctx, &ctx->source_args[0]);
6933     retval += parse_source_token(ctx, &ctx->source_args[1]);
6934     retval += parse_source_token(ctx, &ctx->source_args[2]);
6935     return retval;
6936 } // parse_args_DSSS
6937 
6938 
parse_args_DSSSS(Context * ctx)6939 static int parse_args_DSSSS(Context *ctx)
6940 {
6941     int retval = 1;
6942     retval += parse_destination_token(ctx, &ctx->dest_arg);
6943     retval += parse_source_token(ctx, &ctx->source_args[0]);
6944     retval += parse_source_token(ctx, &ctx->source_args[1]);
6945     retval += parse_source_token(ctx, &ctx->source_args[2]);
6946     retval += parse_source_token(ctx, &ctx->source_args[3]);
6947     return retval;
6948 } // parse_args_DSSSS
6949 
6950 
parse_args_SINCOS(Context * ctx)6951 static int parse_args_SINCOS(Context *ctx)
6952 {
6953     // this opcode needs extra registers for sm2 and lower.
6954     if (!shader_version_atleast(ctx, 3, 0))
6955         return parse_args_DSSS(ctx);
6956     return parse_args_DS(ctx);
6957 } // parse_args_SINCOS
6958 
6959 
parse_args_TEXCRD(Context * ctx)6960 static int parse_args_TEXCRD(Context *ctx)
6961 {
6962     // added extra register in ps_1_4.
6963     if (shader_version_atleast(ctx, 1, 4))
6964         return parse_args_DS(ctx);
6965     return parse_args_D(ctx);
6966 } // parse_args_TEXCRD
6967 
6968 
parse_args_TEXLD(Context * ctx)6969 static int parse_args_TEXLD(Context *ctx)
6970 {
6971     // different registers in px_1_3, ps_1_4, and ps_2_0!
6972     if (shader_version_atleast(ctx, 2, 0))
6973         return parse_args_DSS(ctx);
6974     else if (shader_version_atleast(ctx, 1, 4))
6975         return parse_args_DS(ctx);
6976     return parse_args_D(ctx);
6977 } // parse_args_TEXLD
6978 
6979 
6980 // State machine functions...
6981 
alloc_constant_listitem(Context * ctx)6982 static ConstantsList *alloc_constant_listitem(Context *ctx)
6983 {
6984     ConstantsList *item = (ConstantsList *) Malloc(ctx, sizeof (ConstantsList));
6985     if (item == NULL)
6986         return NULL;
6987 
6988     memset(&item->constant, '\0', sizeof (MOJOSHADER_constant));
6989     item->next = ctx->constants;
6990     ctx->constants = item;
6991     ctx->constant_count++;
6992 
6993     return item;
6994 } // alloc_constant_listitem
6995 
6996 
state_DEF(Context * ctx)6997 static void state_DEF(Context *ctx)
6998 {
6999     const RegisterType regtype = ctx->dest_arg.regtype;
7000     const int regnum = ctx->dest_arg.regnum;
7001 
7002     // !!! FIXME: fail if same register is defined twice.
7003 
7004     if (ctx->instruction_count != 0)
7005         fail(ctx, "DEF token must come before any instructions");
7006     else if (regtype != REG_TYPE_CONST)
7007         fail(ctx, "DEF token using invalid register");
7008     else
7009     {
7010         ConstantsList *item = alloc_constant_listitem(ctx);
7011         if (item != NULL)
7012         {
7013             item->constant.index = regnum;
7014             item->constant.type = MOJOSHADER_UNIFORM_FLOAT;
7015             memcpy(item->constant.value.f, ctx->dwords,
7016                    sizeof (item->constant.value.f));
7017             set_defined_register(ctx, regtype, regnum);
7018         } // if
7019     } // else
7020 } // state_DEF
7021 
state_DEFI(Context * ctx)7022 static void state_DEFI(Context *ctx)
7023 {
7024     const RegisterType regtype = ctx->dest_arg.regtype;
7025     const int regnum = ctx->dest_arg.regnum;
7026 
7027     // !!! FIXME: fail if same register is defined twice.
7028 
7029     if (ctx->instruction_count != 0)
7030         fail(ctx, "DEFI token must come before any instructions");
7031     else if (regtype != REG_TYPE_CONSTINT)
7032         fail(ctx, "DEFI token using invalid register");
7033     else
7034     {
7035         ConstantsList *item = alloc_constant_listitem(ctx);
7036         if (item != NULL)
7037         {
7038             item->constant.index = regnum;
7039             item->constant.type = MOJOSHADER_UNIFORM_INT;
7040             memcpy(item->constant.value.i, ctx->dwords,
7041                    sizeof (item->constant.value.i));
7042 
7043             set_defined_register(ctx, regtype, regnum);
7044         } // if
7045     } // else
7046 } // state_DEFI
7047 
state_DEFB(Context * ctx)7048 static void state_DEFB(Context *ctx)
7049 {
7050     const RegisterType regtype = ctx->dest_arg.regtype;
7051     const int regnum = ctx->dest_arg.regnum;
7052 
7053     // !!! FIXME: fail if same register is defined twice.
7054 
7055     if (ctx->instruction_count != 0)
7056         fail(ctx, "DEFB token must come before any instructions");
7057     else if (regtype != REG_TYPE_CONSTBOOL)
7058         fail(ctx, "DEFB token using invalid register");
7059     else
7060     {
7061         ConstantsList *item = alloc_constant_listitem(ctx);
7062         if (item != NULL)
7063         {
7064             item->constant.index = regnum;
7065             item->constant.type = MOJOSHADER_UNIFORM_BOOL;
7066             item->constant.value.b = ctx->dwords[0] ? 1 : 0;
7067             set_defined_register(ctx, regtype, regnum);
7068         } // if
7069     } // else
7070 } // state_DEFB
7071 
state_DCL(Context * ctx)7072 static void state_DCL(Context *ctx)
7073 {
7074     const DestArgInfo *arg = &ctx->dest_arg;
7075     const RegisterType regtype = arg->regtype;
7076     const int regnum = arg->regnum;
7077     const int wmask = arg->writemask;
7078     const int mods = arg->result_mod;
7079 
7080     // parse_args_DCL() does a lot of state checking before we get here.
7081 
7082     // !!! FIXME: apparently vs_3_0 can use sampler registers now.
7083     // !!! FIXME:  (but only s0 through s3, not all 16 of them.)
7084 
7085     if (ctx->instruction_count != 0)
7086         fail(ctx, "DCL token must come before any instructions");
7087 
7088     else if (shader_is_vertex(ctx))
7089     {
7090         const MOJOSHADER_usage usage = (const MOJOSHADER_usage) ctx->dwords[0];
7091         const int index = ctx->dwords[1];
7092         if (usage >= MOJOSHADER_USAGE_TOTAL)
7093         {
7094             fail(ctx, "unknown DCL usage");
7095             return;
7096         } // if
7097         add_attribute_register(ctx, regtype, regnum, usage, index, wmask, mods);
7098     } // if
7099 
7100     else if (shader_is_pixel(ctx))
7101     {
7102         if (regtype == REG_TYPE_SAMPLER)
7103             add_sampler(ctx, regnum, (TextureType) ctx->dwords[0], 0);
7104         else
7105         {
7106             const MOJOSHADER_usage usage = (MOJOSHADER_usage) ctx->dwords[0];
7107             const int index = ctx->dwords[1];
7108             add_attribute_register(ctx, regtype, regnum, usage, index, wmask, mods);
7109         } // else
7110     } // else if
7111 
7112     else
7113     {
7114         fail(ctx, "unsupported shader type."); // should be caught elsewhere.
7115         return;
7116     } // else
7117 
7118     set_defined_register(ctx, regtype, regnum);
7119 } // state_DCL
7120 
state_TEXCRD(Context * ctx)7121 static void state_TEXCRD(Context *ctx)
7122 {
7123     if (shader_version_atleast(ctx, 2, 0))
7124         fail(ctx, "TEXCRD in Shader Model >= 2.0");  // apparently removed.
7125 } // state_TEXCRD
7126 
state_FRC(Context * ctx)7127 static void state_FRC(Context *ctx)
7128 {
7129     const DestArgInfo *dst = &ctx->dest_arg;
7130 
7131     if (dst->result_mod & MOD_SATURATE)  // according to msdn...
7132         fail(ctx, "FRC destination can't use saturate modifier");
7133 
7134     else if (!shader_version_atleast(ctx, 2, 0))
7135     {
7136         if (!writemask_y(dst->writemask) && !writemask_xy(dst->writemask))
7137             fail(ctx, "FRC writemask must be .y or .xy for shader model 1.x");
7138     } // else if
7139 } // state_FRC
7140 
7141 
7142 // replicate the matrix registers to source args. The D3D profile will
7143 //  only use the one legitimate argument, but this saves other profiles
7144 //  from having to build this.
srcarg_matrix_replicate(Context * ctx,const int idx,const int rows)7145 static void srcarg_matrix_replicate(Context *ctx, const int idx,
7146                                        const int rows)
7147 {
7148     int i;
7149     SourceArgInfo *src = &ctx->source_args[idx];
7150     SourceArgInfo *dst = &ctx->source_args[idx+1];
7151     for (i = 0; i < (rows-1); i++, dst++)
7152     {
7153         memcpy(dst, src, sizeof (SourceArgInfo));
7154         dst->regnum += (i + 1);
7155         set_used_register(ctx, dst->regtype, dst->regnum, 0);
7156     } // for
7157 } // srcarg_matrix_replicate
7158 
state_M4X4(Context * ctx)7159 static void state_M4X4(Context *ctx)
7160 {
7161     const DestArgInfo *info = &ctx->dest_arg;
7162     if (!writemask_xyzw(info->writemask))
7163         fail(ctx, "M4X4 writemask must be full");
7164 
7165 // !!! FIXME: MSDN:
7166 //The xyzw (default) mask is required for the destination register. Negate and swizzle modifiers are allowed for src0, but not for src1.
7167 //Swizzle and negate modifiers are invalid for the src0 register. The dest and src0 registers cannot be the same.
7168 
7169     srcarg_matrix_replicate(ctx, 1, 4);
7170 } // state_M4X4
7171 
state_M4X3(Context * ctx)7172 static void state_M4X3(Context *ctx)
7173 {
7174     const DestArgInfo *info = &ctx->dest_arg;
7175     if (!writemask_xyz(info->writemask))
7176         fail(ctx, "M4X3 writemask must be .xyz");
7177 
7178 // !!! FIXME: MSDN stuff
7179 
7180     srcarg_matrix_replicate(ctx, 1, 3);
7181 } // state_M4X3
7182 
state_M3X4(Context * ctx)7183 static void state_M3X4(Context *ctx)
7184 {
7185     const DestArgInfo *info = &ctx->dest_arg;
7186     if (!writemask_xyzw(info->writemask))
7187         fail(ctx, "M3X4 writemask must be .xyzw");
7188 
7189 // !!! FIXME: MSDN stuff
7190 
7191     srcarg_matrix_replicate(ctx, 1, 4);
7192 } // state_M3X4
7193 
state_M3X3(Context * ctx)7194 static void state_M3X3(Context *ctx)
7195 {
7196     const DestArgInfo *info = &ctx->dest_arg;
7197     if (!writemask_xyz(info->writemask))
7198         fail(ctx, "M3X3 writemask must be .xyz");
7199 
7200 // !!! FIXME: MSDN stuff
7201 
7202     srcarg_matrix_replicate(ctx, 1, 3);
7203 } // state_M3X3
7204 
state_M3X2(Context * ctx)7205 static void state_M3X2(Context *ctx)
7206 {
7207     const DestArgInfo *info = &ctx->dest_arg;
7208     if (!writemask_xy(info->writemask))
7209         fail(ctx, "M3X2 writemask must be .xy");
7210 
7211 // !!! FIXME: MSDN stuff
7212 
7213     srcarg_matrix_replicate(ctx, 1, 2);
7214 } // state_M3X2
7215 
state_RET(Context * ctx)7216 static void state_RET(Context *ctx)
7217 {
7218     // MSDN all but says that assembly shaders are more or less serialized
7219     //  HLSL functions, and a RET means you're at the end of one, unlike how
7220     //  most CPUs would behave. This is actually really helpful,
7221     //  since we can use high-level constructs and not a mess of GOTOs,
7222     //  which is a godsend for GLSL...this also means we can consider things
7223     //  like a LOOP without a matching ENDLOOP within a label's section as
7224     //  an error.
7225     if (ctx->loops > 0)
7226         fail(ctx, "LOOP without ENDLOOP");
7227     if (ctx->reps > 0)
7228         fail(ctx, "REP without ENDREP");
7229 } // state_RET
7230 
check_label_register(Context * ctx,int arg,const char * opcode)7231 static void check_label_register(Context *ctx, int arg, const char *opcode)
7232 {
7233     const SourceArgInfo *info = &ctx->source_args[arg];
7234     const RegisterType regtype = info->regtype;
7235     const int regnum = info->regnum;
7236 
7237     if (regtype != REG_TYPE_LABEL)
7238         failf(ctx, "%s with a non-label register specified", opcode);
7239     if (!shader_version_atleast(ctx, 2, 0))
7240         failf(ctx, "%s not supported in Shader Model 1", opcode);
7241     if ((shader_version_atleast(ctx, 2, 255)) && (regnum > 2047))
7242         fail(ctx, "label register number must be <= 2047");
7243     if (regnum > 15)
7244         fail(ctx, "label register number must be <= 15");
7245 } // check_label_register
7246 
state_LABEL(Context * ctx)7247 static void state_LABEL(Context *ctx)
7248 {
7249     if (ctx->previous_opcode != OPCODE_RET)
7250         fail(ctx, "LABEL not followed by a RET");
7251     check_label_register(ctx, 0, "LABEL");
7252     set_defined_register(ctx, REG_TYPE_LABEL, ctx->source_args[0].regnum);
7253 } // state_LABEL
7254 
check_call_loop_wrappage(Context * ctx,const int regnum)7255 static void check_call_loop_wrappage(Context *ctx, const int regnum)
7256 {
7257     // msdn says subroutines inherit aL register if you're in a loop when
7258     //  you call, and further more _if you ever call this function in a loop,
7259     //  it must always be called in a loop_. So we'll just pass our loop
7260     //  variable as a function parameter in those cases.
7261 
7262     const int current_usage = (ctx->loops > 0) ? 1 : -1;
7263     RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, regnum);
7264     assert(reg != NULL);
7265 
7266     if (reg->misc == 0)
7267         reg->misc = current_usage;
7268     else if (reg->misc != current_usage)
7269     {
7270         if (current_usage == 1)
7271             fail(ctx, "CALL to this label must be wrapped in LOOP/ENDLOOP");
7272         else
7273             fail(ctx, "CALL to this label must not be wrapped in LOOP/ENDLOOP");
7274     } // else if
7275 } // check_call_loop_wrappage
7276 
state_CALL(Context * ctx)7277 static void state_CALL(Context *ctx)
7278 {
7279     check_label_register(ctx, 0, "CALL");
7280     check_call_loop_wrappage(ctx, ctx->source_args[0].regnum);
7281 } // state_CALL
7282 
state_CALLNZ(Context * ctx)7283 static void state_CALLNZ(Context *ctx)
7284 {
7285     const RegisterType regtype = ctx->source_args[1].regtype;
7286     if ((regtype != REG_TYPE_CONSTBOOL) && (regtype != REG_TYPE_PREDICATE))
7287         fail(ctx, "CALLNZ argument isn't constbool or predicate register");
7288     check_label_register(ctx, 0, "CALLNZ");
7289     check_call_loop_wrappage(ctx, ctx->source_args[0].regnum);
7290 } // state_CALLNZ
7291 
state_MOVA(Context * ctx)7292 static void state_MOVA(Context *ctx)
7293 {
7294     if (ctx->dest_arg.regtype != REG_TYPE_ADDRESS)
7295         fail(ctx, "MOVA argument isn't address register");
7296 } // state_MOVA
7297 
state_RCP(Context * ctx)7298 static void state_RCP(Context *ctx)
7299 {
7300     if (!replicate_swizzle(ctx->source_args[0].swizzle))
7301         fail(ctx, "RCP without replicate swizzzle");
7302 } // state_RCP
7303 
state_LOOP(Context * ctx)7304 static void state_LOOP(Context *ctx)
7305 {
7306     if (ctx->source_args[0].regtype != REG_TYPE_LOOP)
7307         fail(ctx, "LOOP argument isn't loop register");
7308     else if (ctx->source_args[1].regtype != REG_TYPE_CONSTINT)
7309         fail(ctx, "LOOP argument isn't constint register");
7310     else
7311         ctx->loops++;
7312 } // state_LOOP
7313 
state_ENDLOOP(Context * ctx)7314 static void state_ENDLOOP(Context *ctx)
7315 {
7316     // !!! FIXME: check that we aren't straddling an IF block.
7317     if (ctx->loops <= 0)
7318         fail(ctx, "ENDLOOP without LOOP");
7319     ctx->loops--;
7320 } // state_ENDLOOP
7321 
state_BREAKP(Context * ctx)7322 static void state_BREAKP(Context *ctx)
7323 {
7324     const RegisterType regtype = ctx->source_args[0].regtype;
7325     if (regtype != REG_TYPE_PREDICATE)
7326         fail(ctx, "BREAKP argument isn't predicate register");
7327     else if (!replicate_swizzle(ctx->source_args[0].swizzle))
7328         fail(ctx, "BREAKP without replicate swizzzle");
7329     else if ((ctx->loops == 0) && (ctx->reps == 0))
7330         fail(ctx, "BREAKP outside LOOP/ENDLOOP or REP/ENDREP");
7331 } // state_BREAKP
7332 
state_BREAK(Context * ctx)7333 static void state_BREAK(Context *ctx)
7334 {
7335     if ((ctx->loops == 0) && (ctx->reps == 0))
7336         fail(ctx, "BREAK outside LOOP/ENDLOOP or REP/ENDREP");
7337 } // state_BREAK
7338 
state_SETP(Context * ctx)7339 static void state_SETP(Context *ctx)
7340 {
7341     const RegisterType regtype = ctx->dest_arg.regtype;
7342     if (regtype != REG_TYPE_PREDICATE)
7343         fail(ctx, "SETP argument isn't predicate register");
7344 } // state_SETP
7345 
state_REP(Context * ctx)7346 static void state_REP(Context *ctx)
7347 {
7348     const RegisterType regtype = ctx->source_args[0].regtype;
7349     if (regtype != REG_TYPE_CONSTINT)
7350         fail(ctx, "REP argument isn't constint register");
7351 
7352     ctx->reps++;
7353     if (ctx->reps > ctx->max_reps)
7354         ctx->max_reps = ctx->reps;
7355 } // state_REP
7356 
state_ENDREP(Context * ctx)7357 static void state_ENDREP(Context *ctx)
7358 {
7359     // !!! FIXME: check that we aren't straddling an IF block.
7360     if (ctx->reps <= 0)
7361         fail(ctx, "ENDREP without REP");
7362     ctx->reps--;
7363 } // state_ENDREP
7364 
state_CMP(Context * ctx)7365 static void state_CMP(Context *ctx)
7366 {
7367     ctx->cmps++;
7368 
7369     // extra limitations for ps <= 1.4 ...
7370     if (!shader_version_atleast(ctx, 1, 4))
7371     {
7372         int i;
7373         const DestArgInfo *dst = &ctx->dest_arg;
7374         const RegisterType dregtype = dst->regtype;
7375         const int dregnum = dst->regnum;
7376 
7377         if (ctx->cmps > 3)
7378             fail(ctx, "only 3 CMP instructions allowed in this shader model");
7379 
7380         for (i = 0; i < 3; i++)
7381         {
7382             const SourceArgInfo *src = &ctx->source_args[i];
7383             const RegisterType sregtype = src->regtype;
7384             const int sregnum = src->regnum;
7385             if ((dregtype == sregtype) && (dregnum == sregnum))
7386                 fail(ctx, "CMP dest can't match sources in this shader model");
7387         } // for
7388 
7389         ctx->instruction_count++;  // takes an extra slot in ps_1_2 and _3.
7390     } // if
7391 } // state_CMP
7392 
state_DP4(Context * ctx)7393 static void state_DP4(Context *ctx)
7394 {
7395     // extra limitations for ps <= 1.4 ...
7396     if (!shader_version_atleast(ctx, 1, 4))
7397         ctx->instruction_count++;  // takes an extra slot in ps_1_2 and _3.
7398 } // state_DP4
7399 
state_CND(Context * ctx)7400 static void state_CND(Context *ctx)
7401 {
7402     // apparently it was removed...it's not in the docs past ps_1_4 ...
7403     if (shader_version_atleast(ctx, 2, 0))
7404         fail(ctx, "CND not allowed in this shader model");
7405 
7406     // extra limitations for ps <= 1.4 ...
7407     else if (!shader_version_atleast(ctx, 1, 4))
7408     {
7409         const SourceArgInfo *src = &ctx->source_args[0];
7410         if ((src->regtype != REG_TYPE_TEMP) || (src->regnum != 0) ||
7411             (src->swizzle != 0xFF))
7412         {
7413             fail(ctx, "CND src must be r0.a in this shader model");
7414         } // if
7415     } // if
7416 } // state_CND
7417 
state_POW(Context * ctx)7418 static void state_POW(Context *ctx)
7419 {
7420     if (!replicate_swizzle(ctx->source_args[0].swizzle))
7421         fail(ctx, "POW src0 must have replicate swizzle");
7422     else if (!replicate_swizzle(ctx->source_args[1].swizzle))
7423         fail(ctx, "POW src1 must have replicate swizzle");
7424 } // state_POW
7425 
state_LOG(Context * ctx)7426 static void state_LOG(Context *ctx)
7427 {
7428     if (!replicate_swizzle(ctx->source_args[0].swizzle))
7429         fail(ctx, "LOG src0 must have replicate swizzle");
7430 } // state_LOG
7431 
state_LOGP(Context * ctx)7432 static void state_LOGP(Context *ctx)
7433 {
7434     if (!replicate_swizzle(ctx->source_args[0].swizzle))
7435         fail(ctx, "LOGP src0 must have replicate swizzle");
7436 } // state_LOGP
7437 
state_SINCOS(Context * ctx)7438 static void state_SINCOS(Context *ctx)
7439 {
7440     const DestArgInfo *dst = &ctx->dest_arg;
7441     const int mask = dst->writemask;
7442     if (!writemask_x(mask) && !writemask_y(mask) && !writemask_xy(mask))
7443         fail(ctx, "SINCOS write mask must be .x or .y or .xy");
7444 
7445     else if (!replicate_swizzle(ctx->source_args[0].swizzle))
7446         fail(ctx, "SINCOS src0 must have replicate swizzle");
7447 
7448     else if (dst->result_mod & MOD_SATURATE)  // according to msdn...
7449         fail(ctx, "SINCOS destination can't use saturate modifier");
7450 
7451     // this opcode needs extra registers, with extra limitations, for <= sm2.
7452     else if (!shader_version_atleast(ctx, 3, 0))
7453     {
7454         int i;
7455         for (i = 1; i < 3; i++)
7456         {
7457             if (ctx->source_args[i].regtype != REG_TYPE_CONST)
7458             {
7459                 failf(ctx, "SINCOS src%d must be constfloat", i);
7460                 return;
7461             } // if
7462         } // for
7463 
7464         if (ctx->source_args[1].regnum == ctx->source_args[2].regnum)
7465             fail(ctx, "SINCOS src1 and src2 must be different registers");
7466     } // if
7467 } // state_SINCOS
7468 
state_IF(Context * ctx)7469 static void state_IF(Context *ctx)
7470 {
7471     const RegisterType regtype = ctx->source_args[0].regtype;
7472     if ((regtype != REG_TYPE_PREDICATE) && (regtype != REG_TYPE_CONSTBOOL))
7473         fail(ctx, "IF src0 must be CONSTBOOL or PREDICATE");
7474     else if (!replicate_swizzle(ctx->source_args[0].swizzle))
7475         fail(ctx, "IF src0 must have replicate swizzle");
7476     // !!! FIXME: track if nesting depth.
7477 } // state_IF
7478 
state_IFC(Context * ctx)7479 static void state_IFC(Context *ctx)
7480 {
7481     if (!replicate_swizzle(ctx->source_args[0].swizzle))
7482         fail(ctx, "IFC src0 must have replicate swizzle");
7483     else if (!replicate_swizzle(ctx->source_args[1].swizzle))
7484         fail(ctx, "IFC src1 must have replicate swizzle");
7485     // !!! FIXME: track if nesting depth.
7486 } // state_IFC
7487 
state_BREAKC(Context * ctx)7488 static void state_BREAKC(Context *ctx)
7489 {
7490     if (!replicate_swizzle(ctx->source_args[0].swizzle))
7491         fail(ctx, "BREAKC src1 must have replicate swizzle");
7492     else if (!replicate_swizzle(ctx->source_args[1].swizzle))
7493         fail(ctx, "BREAKC src2 must have replicate swizzle");
7494     else if ((ctx->loops == 0) && (ctx->reps == 0))
7495         fail(ctx, "BREAKC outside LOOP/ENDLOOP or REP/ENDREP");
7496 } // state_BREAKC
7497 
state_TEXKILL(Context * ctx)7498 static void state_TEXKILL(Context *ctx)
7499 {
7500     // The MSDN docs say this should be a source arg, but the driver docs
7501     //  say it's a dest arg. That's annoying.
7502     const DestArgInfo *info = &ctx->dest_arg;
7503     const RegisterType regtype = info->regtype;
7504     if (!writemask_xyzw(info->writemask))
7505         fail(ctx, "TEXKILL writemask must be .xyzw");
7506     else if ((regtype != REG_TYPE_TEMP) && (regtype != REG_TYPE_TEXTURE))
7507         fail(ctx, "TEXKILL must use a temp or texture register");
7508 
7509     // !!! FIXME: "If a temporary register is used, all components must have been previously written."
7510     // !!! FIXME: "If a texture register is used, all components that are read must have been declared."
7511     // !!! FIXME: there are further limitations in ps_1_3 and earlier.
7512 } // state_TEXKILL
7513 
7514 // Some rules that apply to some of the fruity ps_1_1 texture opcodes...
state_texops(Context * ctx,const char * opcode,const int dims,const int texbem)7515 static void state_texops(Context *ctx, const char *opcode,
7516                          const int dims, const int texbem)
7517 {
7518     const DestArgInfo *dst = &ctx->dest_arg;
7519     const SourceArgInfo *src = &ctx->source_args[0];
7520     if (dst->regtype != REG_TYPE_TEXTURE)
7521         failf(ctx, "%s destination must be a texture register", opcode);
7522     if (src->regtype != REG_TYPE_TEXTURE)
7523         failf(ctx, "%s source must be a texture register", opcode);
7524     if (src->regnum >= dst->regnum)  // so says MSDN.
7525         failf(ctx, "%s dest must be a higher register than source", opcode);
7526 
7527     if (dims)
7528     {
7529         TextureType ttyp = (dims == 2) ? TEXTURE_TYPE_2D : TEXTURE_TYPE_CUBE;
7530         add_sampler(ctx, dst->regnum, ttyp, texbem);
7531     } // if
7532 
7533     add_attribute_register(ctx, REG_TYPE_TEXTURE, dst->regnum,
7534                            MOJOSHADER_USAGE_TEXCOORD, dst->regnum, 0xF, 0);
7535 
7536     // Strictly speaking, there should be a TEX opcode prior to this call that
7537     //  should fill in this metadata, but I'm not sure that's required for the
7538     //  shader to assemble in D3D, so we'll do this so we don't fail with a
7539     //  cryptic error message even if the developer didn't do the TEX.
7540     add_attribute_register(ctx, REG_TYPE_TEXTURE, src->regnum,
7541                            MOJOSHADER_USAGE_TEXCOORD, src->regnum, 0xF, 0);
7542 } // state_texops
7543 
state_texbem(Context * ctx,const char * opcode)7544 static void state_texbem(Context *ctx, const char *opcode)
7545 {
7546     // The TEXBEM equasion, according to MSDN:
7547     //u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R
7548     //         + D3DTSS_BUMPENVMAT10(stage m)*t(n)G
7549     //v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R
7550     //         + D3DTSS_BUMPENVMAT11(stage m)*t(n)G
7551     //t(m)RGBA = TextureSample(stage m)
7552     //
7553     // ...TEXBEML adds this at the end:
7554     //t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) +
7555     //           D3DTSS_BUMPENVLOFFSET(stage m)]
7556 
7557     if (shader_version_atleast(ctx, 1, 4))
7558         failf(ctx, "%s opcode not available after Shader Model 1.3", opcode);
7559 
7560     if (!shader_version_atleast(ctx, 1, 2))
7561     {
7562         if (ctx->source_args[0].src_mod == SRCMOD_SIGN)
7563             failf(ctx, "%s forbids _bx2 on source reg before ps_1_2", opcode);
7564     } // if
7565 
7566     // !!! FIXME: MSDN:
7567     // !!! FIXME: Register data that has been read by a texbem
7568     // !!! FIXME:  or texbeml instruction cannot be read later,
7569     // !!! FIXME:  except by another texbem or texbeml.
7570 
7571     state_texops(ctx, opcode, 2, 1);
7572 } // state_texbem
7573 
state_TEXBEM(Context * ctx)7574 static void state_TEXBEM(Context *ctx)
7575 {
7576     state_texbem(ctx, "TEXBEM");
7577 } // state_TEXBEM
7578 
state_TEXBEML(Context * ctx)7579 static void state_TEXBEML(Context *ctx)
7580 {
7581     state_texbem(ctx, "TEXBEML");
7582 } // state_TEXBEML
7583 
state_TEXM3X2PAD(Context * ctx)7584 static void state_TEXM3X2PAD(Context *ctx)
7585 {
7586     if (shader_version_atleast(ctx, 1, 4))
7587         fail(ctx, "TEXM3X2PAD opcode not available after Shader Model 1.3");
7588     state_texops(ctx, "TEXM3X2PAD", 0, 0);
7589     // !!! FIXME: check for correct opcode existance and order more rigorously?
7590     ctx->texm3x2pad_src0 = ctx->source_args[0].regnum;
7591     ctx->texm3x2pad_dst0 = ctx->dest_arg.regnum;
7592 } // state_TEXM3X2PAD
7593 
state_TEXM3X2TEX(Context * ctx)7594 static void state_TEXM3X2TEX(Context *ctx)
7595 {
7596     if (shader_version_atleast(ctx, 1, 4))
7597         fail(ctx, "TEXM3X2TEX opcode not available after Shader Model 1.3");
7598     if (ctx->texm3x2pad_dst0 == -1)
7599         fail(ctx, "TEXM3X2TEX opcode without matching TEXM3X2PAD");
7600     // !!! FIXME: check for correct opcode existance and order more rigorously?
7601     state_texops(ctx, "TEXM3X2TEX", 2, 0);
7602     ctx->reset_texmpad = 1;
7603 
7604     RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
7605                                       ctx->dest_arg.regnum);
7606     const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
7607 
7608     // A samplermap might change this to something nonsensical.
7609     if (ttype != TEXTURE_TYPE_2D)
7610         fail(ctx, "TEXM3X2TEX needs a 2D sampler");
7611 } // state_TEXM3X2TEX
7612 
state_TEXM3X3PAD(Context * ctx)7613 static void state_TEXM3X3PAD(Context *ctx)
7614 {
7615     if (shader_version_atleast(ctx, 1, 4))
7616         fail(ctx, "TEXM3X2TEX opcode not available after Shader Model 1.3");
7617     state_texops(ctx, "TEXM3X3PAD", 0, 0);
7618 
7619     // !!! FIXME: check for correct opcode existance and order more rigorously?
7620     if (ctx->texm3x3pad_dst0 == -1)
7621     {
7622         ctx->texm3x3pad_src0 = ctx->source_args[0].regnum;
7623         ctx->texm3x3pad_dst0 = ctx->dest_arg.regnum;
7624     } // if
7625     else if (ctx->texm3x3pad_dst1 == -1)
7626     {
7627         ctx->texm3x3pad_src1 = ctx->source_args[0].regnum;
7628         ctx->texm3x3pad_dst1 = ctx->dest_arg.regnum;
7629     } // else
7630 } // state_TEXM3X3PAD
7631 
state_texm3x3(Context * ctx,const char * opcode,const int dims)7632 static void state_texm3x3(Context *ctx, const char *opcode, const int dims)
7633 {
7634     // !!! FIXME: check for correct opcode existance and order more rigorously?
7635     if (shader_version_atleast(ctx, 1, 4))
7636         failf(ctx, "%s opcode not available after Shader Model 1.3", opcode);
7637     if (ctx->texm3x3pad_dst1 == -1)
7638         failf(ctx, "%s opcode without matching TEXM3X3PADs", opcode);
7639     state_texops(ctx, opcode, dims, 0);
7640     ctx->reset_texmpad = 1;
7641 
7642     RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER,
7643                                       ctx->dest_arg.regnum);
7644     const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
7645 
7646     // A samplermap might change this to something nonsensical.
7647     if ((ttype != TEXTURE_TYPE_VOLUME) && (ttype != TEXTURE_TYPE_CUBE))
7648         failf(ctx, "%s needs a 3D or Cubemap sampler", opcode);
7649 } // state_texm3x3
7650 
state_TEXM3X3(Context * ctx)7651 static void state_TEXM3X3(Context *ctx)
7652 {
7653     if (!shader_version_atleast(ctx, 1, 2))
7654         fail(ctx, "TEXM3X3 opcode not available in Shader Model 1.1");
7655     state_texm3x3(ctx, "TEXM3X3", 0);
7656 } // state_TEXM3X3
7657 
state_TEXM3X3TEX(Context * ctx)7658 static void state_TEXM3X3TEX(Context *ctx)
7659 {
7660     state_texm3x3(ctx, "TEXM3X3TEX", 3);
7661 } // state_TEXM3X3TEX
7662 
state_TEXM3X3SPEC(Context * ctx)7663 static void state_TEXM3X3SPEC(Context *ctx)
7664 {
7665     state_texm3x3(ctx, "TEXM3X3SPEC", 3);
7666     if (ctx->source_args[1].regtype != REG_TYPE_CONST)
7667         fail(ctx, "TEXM3X3SPEC final arg must be a constant register");
7668 } // state_TEXM3X3SPEC
7669 
state_TEXM3X3VSPEC(Context * ctx)7670 static void state_TEXM3X3VSPEC(Context *ctx)
7671 {
7672     state_texm3x3(ctx, "TEXM3X3VSPEC", 3);
7673 } // state_TEXM3X3VSPEC
7674 
7675 
state_TEXLD(Context * ctx)7676 static void state_TEXLD(Context *ctx)
7677 {
7678     if (shader_version_atleast(ctx, 2, 0))
7679     {
7680         const SourceArgInfo *src0 = &ctx->source_args[0];
7681         const SourceArgInfo *src1 = &ctx->source_args[1];
7682 
7683         // !!! FIXME: verify texldp restrictions:
7684         //http://msdn.microsoft.com/en-us/library/bb206221(VS.85).aspx
7685         // !!! FIXME: ...and texldb, too.
7686         //http://msdn.microsoft.com/en-us/library/bb206217(VS.85).aspx
7687 
7688         //const RegisterType rt0 = src0->regtype;
7689 
7690         // !!! FIXME: msdn says it has to be temp, but Microsoft's HLSL
7691         // !!! FIXME:  compiler is generating code that uses oC0 for a dest.
7692         //if (ctx->dest_arg.regtype != REG_TYPE_TEMP)
7693         //    fail(ctx, "TEXLD dest must be a temp register");
7694 
7695         // !!! FIXME: this can be an REG_TYPE_INPUT, DCL'd to TEXCOORD.
7696         //else if ((rt0 != REG_TYPE_TEXTURE) && (rt0 != REG_TYPE_TEMP))
7697         //    fail(ctx, "TEXLD src0 must be texture or temp register");
7698         //else
7699 
7700         if (src0->src_mod != SRCMOD_NONE)
7701             fail(ctx, "TEXLD src0 must have no modifiers");
7702         else if (src1->regtype != REG_TYPE_SAMPLER)
7703             fail(ctx, "TEXLD src1 must be sampler register");
7704         else if (src1->src_mod != SRCMOD_NONE)
7705             fail(ctx, "TEXLD src1 must have no modifiers");
7706         else if ( (ctx->instruction_controls != CONTROL_TEXLD) &&
7707                   (ctx->instruction_controls != CONTROL_TEXLDP) &&
7708                   (ctx->instruction_controls != CONTROL_TEXLDB) )
7709         {
7710             fail(ctx, "TEXLD has unknown control bits");
7711         } // else if
7712 
7713         // Shader Model 3 added swizzle support to this opcode.
7714         if (!shader_version_atleast(ctx, 3, 0))
7715         {
7716             if (!no_swizzle(src0->swizzle))
7717                 fail(ctx, "TEXLD src0 must not swizzle");
7718             else if (!no_swizzle(src1->swizzle))
7719                 fail(ctx, "TEXLD src1 must not swizzle");
7720         } // if
7721 
7722         if ( ((TextureType) ctx->source_args[1].regnum) == TEXTURE_TYPE_CUBE )
7723             ctx->instruction_count += 3;
7724     } // if
7725 
7726     else if (shader_version_atleast(ctx, 1, 4))
7727     {
7728         // !!! FIXME: checks for ps_1_4 version here...
7729     } // else if
7730 
7731     else
7732     {
7733         // !!! FIXME: add (other?) checks for ps_1_1 version here...
7734         const DestArgInfo *info = &ctx->dest_arg;
7735         const int sampler = info->regnum;
7736         if (info->regtype != REG_TYPE_TEXTURE)
7737             fail(ctx, "TEX param must be a texture register");
7738         add_sampler(ctx, sampler, TEXTURE_TYPE_2D, 0);
7739         add_attribute_register(ctx, REG_TYPE_TEXTURE, sampler,
7740                                MOJOSHADER_USAGE_TEXCOORD, sampler, 0xF, 0);
7741     } // else
7742 } // state_TEXLD
7743 
state_TEXLDL(Context * ctx)7744 static void state_TEXLDL(Context *ctx)
7745 {
7746     if (!shader_version_atleast(ctx, 3, 0))
7747         fail(ctx, "TEXLDL in version < Shader Model 3.0");
7748     else if (ctx->source_args[1].regtype != REG_TYPE_SAMPLER)
7749         fail(ctx, "TEXLDL src1 must be sampler register");
7750     else
7751     {
7752         if ( ((TextureType) ctx->source_args[1].regnum) == TEXTURE_TYPE_CUBE )
7753             ctx->instruction_count += 3;
7754     } // else
7755 } // state_TEXLDL
7756 
state_DP2ADD(Context * ctx)7757 static void state_DP2ADD(Context *ctx)
7758 {
7759     if (!replicate_swizzle(ctx->source_args[2].swizzle))
7760         fail(ctx, "DP2ADD src2 must have replicate swizzle");
7761 } // state_DP2ADD
7762 
7763 
7764 // Lookup table for instruction opcodes...
7765 typedef struct
7766 {
7767     const char *opcode_string;
7768     int slots;  // number of instruction slots this opcode eats.
7769     MOJOSHADER_shaderType shader_types;  // mask of types that can use opcode.
7770     args_function parse_args;
7771     state_function state;
7772     emit_function emitter[STATICARRAYLEN(profiles)];
7773 } Instruction;
7774 
7775 // These have to be in the right order! This array is indexed by the value
7776 //  of the instruction token.
7777 static const Instruction instructions[] =
7778 {
7779     #define INSTRUCTION_STATE(op, opstr, slots, a, t) { \
7780         opstr, slots, t, parse_args_##a, state_##op, PROFILE_EMITTERS(op) \
7781     },
7782 
7783     #define INSTRUCTION(op, opstr, slots, a, t) { \
7784         opstr, slots, t, parse_args_##a, 0, PROFILE_EMITTERS(op) \
7785     },
7786 
7787     #define MOJOSHADER_DO_INSTRUCTION_TABLE 1
7788     #include "mojoshader_internal.h"
7789     #undef MOJOSHADER_DO_INSTRUCTION_TABLE
7790 
7791     #undef INSTRUCTION
7792     #undef INSTRUCTION_STATE
7793 };
7794 
7795 
7796 // parse various token types...
7797 
parse_instruction_token(Context * ctx)7798 static int parse_instruction_token(Context *ctx)
7799 {
7800     int retval = 0;
7801     const int start_position = ctx->current_position;
7802     const uint32 *start_tokens = ctx->tokens;
7803     const uint32 start_tokencount = ctx->tokencount;
7804     const uint32 token = SWAP32(*(ctx->tokens));
7805     const uint32 opcode = (token & 0xFFFF);
7806     const uint32 controls = ((token >> 16) & 0xFF);
7807     const uint32 insttoks = ((token >> 24) & 0x0F);
7808     const int coissue = (token & 0x40000000) ? 1 : 0;
7809     const int predicated = (token & 0x10000000) ? 1 : 0;
7810 
7811     if ( opcode >= (sizeof (instructions) / sizeof (instructions[0])) )
7812         return 0;  // not an instruction token, or just not handled here.
7813 
7814     const Instruction *instruction = &instructions[opcode];
7815     const emit_function emitter = instruction->emitter[ctx->profileid];
7816 
7817     if ((token & 0x80000000) != 0)
7818         fail(ctx, "instruction token high bit must be zero.");  // so says msdn.
7819 
7820     if (instruction->opcode_string == NULL)
7821     {
7822         fail(ctx, "Unknown opcode.");
7823         return insttoks + 1;  // pray that you resync later.
7824     } // if
7825 
7826     ctx->coissue = coissue;
7827     if (coissue)
7828     {
7829         if (!shader_is_pixel(ctx))
7830             fail(ctx, "coissue instruction on non-pixel shader");
7831         if (shader_version_atleast(ctx, 2, 0))
7832             fail(ctx, "coissue instruction in Shader Model >= 2.0");
7833     } // if
7834 
7835     if ((ctx->shader_type & instruction->shader_types) == 0)
7836     {
7837         failf(ctx, "opcode '%s' not available in this shader type.",
7838                 instruction->opcode_string);
7839     } // if
7840 
7841     memset(ctx->dwords, '\0', sizeof (ctx->dwords));
7842     ctx->instruction_controls = controls;
7843     ctx->predicated = predicated;
7844 
7845     // Update the context with instruction's arguments.
7846     adjust_token_position(ctx, 1);
7847     retval = instruction->parse_args(ctx);
7848 
7849     if (predicated)
7850         retval += parse_predicated_token(ctx);
7851 
7852     // parse_args() moves these forward for convenience...reset them.
7853     ctx->tokens = start_tokens;
7854     ctx->tokencount = start_tokencount;
7855     ctx->current_position = start_position;
7856 
7857     if (instruction->state != NULL)
7858         instruction->state(ctx);
7859 
7860     ctx->instruction_count += instruction->slots;
7861 
7862     if (!isfail(ctx))
7863         emitter(ctx);  // call the profile's emitter.
7864 
7865     if (ctx->reset_texmpad)
7866     {
7867         ctx->texm3x2pad_dst0 = -1;
7868         ctx->texm3x2pad_src0 = -1;
7869         ctx->texm3x3pad_dst0 = -1;
7870         ctx->texm3x3pad_src0 = -1;
7871         ctx->texm3x3pad_dst1 = -1;
7872         ctx->texm3x3pad_src1 = -1;
7873         ctx->reset_texmpad = 0;
7874     } // if
7875 
7876     ctx->previous_opcode = opcode;
7877     ctx->scratch_registers = 0;  // reset after every instruction.
7878 
7879     if (!shader_version_atleast(ctx, 2, 0))
7880     {
7881         if (insttoks != 0)  // reserved field in shaders < 2.0 ...
7882             fail(ctx, "instruction token count must be zero");
7883     } // if
7884     else
7885     {
7886         if (((uint32)retval) != (insttoks+1))
7887         {
7888             failf(ctx, "wrong token count (%u, not %u) for opcode '%s'.",
7889                     (uint) retval, (uint) (insttoks+1),
7890                     instruction->opcode_string);
7891             retval = insttoks + 1;  // try to keep sync.
7892         } // if
7893     } // else
7894 
7895     return retval;
7896 } // parse_instruction_token
7897 
7898 
parse_version_token(Context * ctx,const char * profilestr)7899 static int parse_version_token(Context *ctx, const char *profilestr)
7900 {
7901     if (ctx->tokencount == 0)
7902     {
7903         fail(ctx, "Expected version token, got none at all.");
7904         return 0;
7905     } // if
7906 
7907     const uint32 token = SWAP32(*(ctx->tokens));
7908     const uint32 shadertype = ((token >> 16) & 0xFFFF);
7909     const uint8 major = (uint8) ((token >> 8) & 0xFF);
7910     const uint8 minor = (uint8) (token & 0xFF);
7911 
7912     ctx->version_token = token;
7913 
7914     // 0xFFFF == pixel shader, 0xFFFE == vertex shader
7915     if (shadertype == 0xFFFF)
7916     {
7917         ctx->shader_type = MOJOSHADER_TYPE_PIXEL;
7918         ctx->shader_type_str = "ps";
7919     } // if
7920     else if (shadertype == 0xFFFE)
7921     {
7922         ctx->shader_type = MOJOSHADER_TYPE_VERTEX;
7923         ctx->shader_type_str = "vs";
7924     } // else if
7925     else  // geometry shader? Bogus data?
7926     {
7927         fail(ctx, "Unsupported shader type or not a shader at all");
7928         return -1;
7929     } // else
7930 
7931     ctx->major_ver = major;
7932     ctx->minor_ver = minor;
7933 
7934     if (!shader_version_supported(major, minor))
7935     {
7936         failf(ctx, "Shader Model %u.%u is currently unsupported.",
7937                 (uint) major, (uint) minor);
7938     } // if
7939 
7940     if (!isfail(ctx))
7941         ctx->profile->start_emitter(ctx, profilestr);
7942 
7943     return 1;  // ate one token.
7944 } // parse_version_token
7945 
7946 
parse_ctab_string(const uint8 * start,const uint32 bytes,const uint32 name)7947 static int parse_ctab_string(const uint8 *start, const uint32 bytes,
7948                              const uint32 name)
7949 {
7950     // Make sure strings don't overflow the CTAB buffer...
7951     if (name < bytes)
7952     {
7953         int i;
7954         const int slenmax = bytes - name;
7955         const char *namestr = (const char *) (start + name);
7956         for (i = 0; i < slenmax; i++)
7957         {
7958             if (namestr[i] == '\0')
7959                 return 1;  // it's okay.
7960         } // for
7961     } // if
7962 
7963     return 0;  // overflowed.
7964 } // parse_ctab_string
7965 
7966 
parse_ctab_typeinfo(Context * ctx,const uint8 * start,const uint32 bytes,const uint32 pos,MOJOSHADER_symbolTypeInfo * info)7967 static int parse_ctab_typeinfo(Context *ctx, const uint8 *start,
7968                                const uint32 bytes, const uint32 pos,
7969                                MOJOSHADER_symbolTypeInfo *info)
7970 {
7971     if ((pos + 16) >= bytes)
7972         return 0;  // corrupt CTAB.
7973 
7974     const uint16 *typeptr = (const uint16 *) (start + pos);
7975 
7976     info->parameter_class = (MOJOSHADER_symbolClass) SWAP16(typeptr[0]);
7977     info->parameter_type = (MOJOSHADER_symbolType) SWAP16(typeptr[1]);
7978     info->rows = (unsigned int) SWAP16(typeptr[2]);
7979     info->columns = (unsigned int) SWAP16(typeptr[3]);
7980     info->elements = (unsigned int) SWAP16(typeptr[4]);
7981     info->member_count = (unsigned int) SWAP16(typeptr[5]);
7982 
7983     if ((pos + 16 + (info->member_count * 8)) >= bytes)
7984         return 0;  // corrupt CTAB.
7985 
7986     if (info->member_count == 0)
7987         info->members = NULL;
7988     else
7989     {
7990         const size_t len = sizeof (MOJOSHADER_symbolStructMember) *
7991                             info->member_count;
7992         info->members = (MOJOSHADER_symbolStructMember *) Malloc(ctx, len);
7993         if (info->members == NULL)
7994             return 1;  // we'll check ctx->out_of_memory later.
7995         memset(info->members, '\0', len);
7996     } // else
7997 
7998     int i;
7999     const uint32 *member = (const uint32 *)((const uint8 *) (&typeptr[6]));
8000     for (i = 0; i < info->member_count; i++)
8001     {
8002         MOJOSHADER_symbolStructMember *mbr = &info->members[i];
8003         const uint32 name = SWAP32(member[0]);
8004         const uint32 memberinfopos = SWAP32(member[1]);
8005         member += 2;
8006 
8007         if (!parse_ctab_string(start, bytes, name))
8008             return 0;  // info->members will be free()'d elsewhere.
8009 
8010         mbr->name = StrDup(ctx, (const char *) (start + name));
8011         if (mbr->name == NULL)
8012             return 1;  // we'll check ctx->out_of_memory later.
8013         if (!parse_ctab_typeinfo(ctx, start, bytes, memberinfopos, &mbr->info))
8014             return 0;
8015         if (ctx->out_of_memory)
8016             return 1;  // drop out now.
8017     } // for
8018 
8019     return 1;
8020 } // parse_ctab_typeinfo
8021 
8022 
8023 // Microsoft's tools add a CTAB comment to all shaders. This is the
8024 //  "constant table," or specifically: D3DXSHADER_CONSTANTTABLE:
8025 //  http://msdn.microsoft.com/en-us/library/bb205440(VS.85).aspx
8026 // This may tell us high-level truths about an otherwise generic low-level
8027 //  registers, for instance, how large an array actually is, etc.
parse_constant_table(Context * ctx,const uint32 * tokens,const uint32 bytes,const uint32 okay_version,const int setvariables,CtabData * ctab)8028 static void parse_constant_table(Context *ctx, const uint32 *tokens,
8029                                  const uint32 bytes, const uint32 okay_version,
8030                                  const int setvariables, CtabData *ctab)
8031 {
8032     const uint32 id = SWAP32(tokens[1]);
8033     if (id != CTAB_ID)
8034         return;  // not the constant table.
8035 
8036     assert(ctab->have_ctab == 0);  // !!! FIXME: can you have more than one?
8037     ctab->have_ctab = 1;
8038 
8039     const uint8 *start = (uint8 *) &tokens[2];
8040 
8041     if (bytes < 32)
8042     {
8043         fail(ctx, "Truncated CTAB data");
8044         return;
8045     } // if
8046 
8047     const uint32 size = SWAP32(tokens[2]);
8048     const uint32 creator = SWAP32(tokens[3]);
8049     const uint32 version = SWAP32(tokens[4]);
8050     const uint32 constants = SWAP32(tokens[5]);
8051     const uint32 constantinfo = SWAP32(tokens[6]);
8052     const uint32 target = SWAP32(tokens[8]);
8053 
8054     if (size != CTAB_SIZE)
8055         goto corrupt_ctab;
8056 
8057     if (version != okay_version) goto corrupt_ctab;
8058     if (creator >= bytes) goto corrupt_ctab;
8059     if ((constantinfo + (constants * CINFO_SIZE)) >= bytes) goto corrupt_ctab;
8060     if (target >= bytes) goto corrupt_ctab;
8061     if (!parse_ctab_string(start, bytes, target)) goto corrupt_ctab;
8062     // !!! FIXME: check that (start+target) points to "ps_3_0", etc.
8063 
8064     ctab->symbol_count = constants;
8065     ctab->symbols = (MOJOSHADER_symbol *)Malloc(ctx, sizeof (MOJOSHADER_symbol) * constants);
8066     if (ctab->symbols == NULL)
8067         return;
8068     memset(ctab->symbols, '\0', sizeof (MOJOSHADER_symbol) * constants);
8069 
8070     uint32 i = 0;
8071     for (i = 0; i < constants; i++)
8072     {
8073         const uint8 *ptr = start + constantinfo + (i * CINFO_SIZE);
8074         const uint32 name = SWAP32(*((uint32 *) (ptr + 0)));
8075         const uint16 regset = SWAP16(*((uint16 *) (ptr + 4)));
8076         const uint16 regidx = SWAP16(*((uint16 *) (ptr + 6)));
8077         const uint16 regcnt = SWAP16(*((uint16 *) (ptr + 8)));
8078         const uint32 typeinf = SWAP32(*((uint32 *) (ptr + 12)));
8079         const uint32 defval = SWAP32(*((uint32 *) (ptr + 16)));
8080         MOJOSHADER_uniformType mojotype = MOJOSHADER_UNIFORM_UNKNOWN;
8081 
8082         if (!parse_ctab_string(start, bytes, name)) goto corrupt_ctab;
8083         if (defval >= bytes) goto corrupt_ctab;
8084 
8085         switch (regset)
8086         {
8087             case 0: mojotype = MOJOSHADER_UNIFORM_BOOL; break;
8088             case 1: mojotype = MOJOSHADER_UNIFORM_INT; break;
8089             case 2: mojotype = MOJOSHADER_UNIFORM_FLOAT; break;
8090             case 3: /* SAMPLER */ break;
8091             default: goto corrupt_ctab;
8092         } // switch
8093 
8094         if ((setvariables) && (mojotype != MOJOSHADER_UNIFORM_UNKNOWN))
8095         {
8096             VariableList *item;
8097             item = (VariableList *) Malloc(ctx, sizeof (VariableList));
8098             if (item != NULL)
8099             {
8100                 item->type = mojotype;
8101                 item->index = regidx;
8102                 item->count = regcnt;
8103                 item->constant = NULL;
8104                 item->used = 0;
8105                 item->emit_position = -1;
8106                 item->next = ctx->variables;
8107                 ctx->variables = item;
8108             } // if
8109         } // if
8110 
8111         // Add the symbol.
8112         const char *namecpy = StrDup(ctx, (const char *) (start + name));
8113         if (namecpy == NULL)
8114             return;
8115 
8116         MOJOSHADER_symbol *sym = &ctab->symbols[i];
8117         sym->name = namecpy;
8118         sym->register_set = (MOJOSHADER_symbolRegisterSet) regset;
8119         sym->register_index = (unsigned int) regidx;
8120         sym->register_count = (unsigned int) regcnt;
8121         if (!parse_ctab_typeinfo(ctx, start, bytes, typeinf, &sym->info))
8122             goto corrupt_ctab;  // sym->name will get free()'d later.
8123         else if (ctx->out_of_memory)
8124             return;  // just bail now.
8125     } // for
8126 
8127     return;
8128 
8129 corrupt_ctab:
8130     fail(ctx, "Shader has corrupt CTAB data");
8131 } // parse_constant_table
8132 
8133 
8134 static void free_symbols(MOJOSHADER_free f, void *d, MOJOSHADER_symbol *syms,
8135                          const int symcount);
8136 
8137 
is_comment_token(Context * ctx,const uint32 tok,uint32 * tokcount)8138 static int is_comment_token(Context *ctx, const uint32 tok, uint32 *tokcount)
8139 {
8140     const uint32 token = SWAP32(tok);
8141     if ((token & 0xFFFF) == 0xFFFE)  // actually a comment token?
8142     {
8143         if ((token & 0x80000000) != 0)
8144             fail(ctx, "comment token high bit must be zero.");  // so says msdn.
8145         *tokcount = ((token >> 16) & 0xFFFF);
8146         return 1;
8147     } // if
8148 
8149     return 0;
8150 } // is_comment_token
8151 
8152 
8153 typedef struct PreshaderBlockInfo
8154 {
8155     const uint32 *tokens;
8156     uint32 tokcount;
8157     int seen;
8158 } PreshaderBlockInfo;
8159 
8160 // Preshaders only show up in compiled Effect files. The format is
8161 //  undocumented, and even the instructions aren't the same opcodes as you
8162 //  would find in a regular shader. These things show up because the HLSL
8163 //  compiler can detect work that sets up constant registers that could
8164 //  be moved out of the shader itself. Preshaders run once, then the shader
8165 //  itself runs many times, using the constant registers the preshader has set
8166 //  up. There are cases where the preshaders are 3+ times as many instructions
8167 //  as the shader itself, so this can be a big performance win.
8168 // My presumption is that Microsoft's Effects framework runs the preshaders on
8169 //  the CPU, then loads the constant register file appropriately before handing
8170 //  off to the GPU. As such, we do the same.
parse_preshader(Context * ctx,uint32 tokcount)8171 static void parse_preshader(Context *ctx, uint32 tokcount)
8172 {
8173     const uint32 *tokens = ctx->tokens;
8174     if ((tokcount < 2) || (SWAP32(tokens[1]) != PRES_ID))
8175         return;  // not a preshader.
8176 
8177 #if !SUPPORT_PRESHADERS
8178     fail(ctx, "Preshader found, but preshader support is disabled!");
8179 #else
8180 
8181     assert(ctx->have_preshader == 0);  // !!! FIXME: can you have more than one?
8182     ctx->have_preshader = 1;
8183 
8184     // !!! FIXME: I don't know what specific versions signify, but we need to
8185     // !!! FIXME:  save this to test against the CTAB version field, if
8186     // !!! FIXME:  nothing else.
8187     // !!! FIXME: 0x02 0x01 is probably the version (fx_2_1),
8188     // !!! FIXME:  and 0x4658 is the magic, like a real shader's version token.
8189     const uint32 okay_version = 0x46580201;
8190     if (SWAP32(tokens[2]) != okay_version)
8191     {
8192         fail(ctx, "Unsupported preshader version.");
8193         return;  // fail because the shader will malfunction w/o this.
8194     } // if
8195 
8196     tokens += 3;
8197     tokcount -= 3;
8198 
8199     // All sections of a preshader are packed into separate comment tokens,
8200     //  inside the containing comment token block. Find them all before
8201     //  we start, so we don't care about the order they appear in the file.
8202     PreshaderBlockInfo ctab = { 0, 0, 0 };
8203     PreshaderBlockInfo prsi = { 0, 0, 0 };
8204     PreshaderBlockInfo fxlc = { 0, 0, 0 };
8205     PreshaderBlockInfo clit = { 0, 0, 0 };
8206 
8207     while (tokcount > 0)
8208     {
8209         uint32 subtokcount = 0;
8210         if ( (!is_comment_token(ctx, *tokens, &subtokcount)) ||
8211              (subtokcount > tokcount) )
8212         {
8213             fail(ctx, "Bogus preshader data.");
8214             return;
8215         } // if
8216 
8217         tokens++;
8218         tokcount--;
8219 
8220         const uint32 *nexttokens = tokens + subtokcount;
8221         const uint32 nexttokcount = tokcount - subtokcount;
8222 
8223         if (subtokcount > 0)
8224         {
8225             switch (SWAP32(*tokens))
8226             {
8227                 #define PRESHADER_BLOCK_CASE(id, var) \
8228                     case id##_ID: { \
8229                         if (var.seen) { \
8230                             fail(ctx, "Multiple " #id " preshader blocks."); \
8231                             return; \
8232                         } \
8233                         var.tokens = tokens; \
8234                         var.tokcount = subtokcount; \
8235                         var.seen = 1; \
8236                         break; \
8237                     }
8238                 PRESHADER_BLOCK_CASE(CTAB, ctab);
8239                 PRESHADER_BLOCK_CASE(PRSI, prsi);
8240                 PRESHADER_BLOCK_CASE(FXLC, fxlc);
8241                 PRESHADER_BLOCK_CASE(CLIT, clit);
8242                 default: fail(ctx, "Bogus preshader section."); return;
8243                 #undef PRESHADER_BLOCK_CASE
8244             } // switch
8245         } // if
8246 
8247         tokens = nexttokens;
8248         tokcount = nexttokcount;
8249     } // while
8250 
8251     if (!ctab.seen) { fail(ctx, "No CTAB block in preshader."); return; }
8252     if (!prsi.seen) { fail(ctx, "No PRSI block in preshader."); return; }
8253     if (!fxlc.seen) { fail(ctx, "No FXLC block in preshader."); return; }
8254     if (!clit.seen) { fail(ctx, "No CLIT block in preshader."); return; }
8255 
8256     MOJOSHADER_preshader *preshader = (MOJOSHADER_preshader *)
8257                                     Malloc(ctx, sizeof (MOJOSHADER_preshader));
8258     if (preshader == NULL)
8259         return;
8260     memset(preshader, '\0', sizeof (MOJOSHADER_preshader));
8261     ctx->preshader = preshader;
8262 
8263     // Let's set up the constant literals first...
8264     if (clit.tokcount == 0)
8265         fail(ctx, "Bogus CLIT block in preshader.");
8266     else
8267     {
8268         const uint32 lit_count = SWAP32(clit.tokens[1]);
8269         if (lit_count > ((clit.tokcount - 2) / 2))
8270         {
8271             fail(ctx, "Bogus CLIT block in preshader.");
8272             return;
8273         } // if
8274         else if (lit_count > 0)
8275         {
8276             preshader->literal_count = (unsigned int) lit_count;
8277             assert(sizeof (double) == 8);  // just in case.
8278             const size_t len = sizeof (double) * lit_count;
8279             preshader->literals = (double *) Malloc(ctx, len);
8280             if (preshader->literals == NULL)
8281                 return;  // oh well.
8282             const double *litptr = (const double *) (clit.tokens + 2);
8283             int i;
8284             for (i = 0; i < lit_count; i++)
8285                 preshader->literals[i] = SWAPDBL(litptr[i]);
8286         } // else if
8287     } // else
8288 
8289     // Parse out the PRSI block. This is used to map the output registers.
8290     if (prsi.tokcount < 8)
8291     {
8292         fail(ctx, "Bogus preshader PRSI data");
8293         return;
8294     } // if
8295 
8296     //const uint32 first_output_reg = SWAP32(prsi.tokens[1]);
8297     // !!! FIXME: there are a lot of fields here I don't know about.
8298     // !!! FIXME:  maybe [2] and [3] are for int4 and bool registers?
8299     //const uint32 output_reg_count = SWAP32(prsi.tokens[4]);
8300     // !!! FIXME:  maybe [5] and [6] are for int4 and bool registers?
8301     const uint32 output_map_count = SWAP32(prsi.tokens[7]);
8302 
8303     prsi.tokcount -= 8;
8304     prsi.tokens += 8;
8305 
8306     if (prsi.tokcount < ((output_map_count + 1) * 2))
8307     {
8308         fail(ctx, "Bogus preshader PRSI data");
8309         return;
8310     } // if
8311 
8312     const uint32 *output_map = prsi.tokens;
8313 
8314     // Now we'll figure out the CTAB...
8315     CtabData ctabdata = { 0, 0, 0 };
8316     parse_constant_table(ctx, ctab.tokens - 1, ctab.tokcount * 4,
8317                          okay_version, 0, &ctabdata);
8318 
8319     // preshader owns this now. Don't free it in this function.
8320     preshader->symbol_count = ctabdata.symbol_count;
8321     preshader->symbols = ctabdata.symbols;
8322 
8323     if (!ctabdata.have_ctab)
8324     {
8325         fail(ctx, "Bogus preshader CTAB data");
8326         return;
8327     } // if
8328 
8329     // The FXLC block has the actual instructions...
8330     uint32 opcode_count = SWAP32(fxlc.tokens[1]);
8331 
8332     size_t len = sizeof (MOJOSHADER_preshaderInstruction) * opcode_count;
8333     preshader->instruction_count = (unsigned int) opcode_count;
8334     preshader->instructions = (MOJOSHADER_preshaderInstruction *)
8335                                 Malloc(ctx, len);
8336     if (preshader->instructions == NULL)
8337         return;
8338     memset(preshader->instructions, '\0', len);
8339 
8340     fxlc.tokens += 2;
8341     fxlc.tokcount -= 2;
8342     if (opcode_count > (fxlc.tokcount / 2))
8343     {
8344         fail(ctx, "Bogus preshader FXLC block.");
8345         return;
8346     } // if
8347 
8348     MOJOSHADER_preshaderInstruction *inst = preshader->instructions;
8349     while (opcode_count--)
8350     {
8351         const uint32 opcodetok = SWAP32(fxlc.tokens[0]);
8352         MOJOSHADER_preshaderOpcode opcode = MOJOSHADER_PRESHADEROP_NOP;
8353         switch ((opcodetok >> 16) & 0xFFFF)
8354         {
8355             case 0x1000: opcode = MOJOSHADER_PRESHADEROP_MOV; break;
8356             case 0x1010: opcode = MOJOSHADER_PRESHADEROP_NEG; break;
8357             case 0x1030: opcode = MOJOSHADER_PRESHADEROP_RCP; break;
8358             case 0x1040: opcode = MOJOSHADER_PRESHADEROP_FRC; break;
8359             case 0x1050: opcode = MOJOSHADER_PRESHADEROP_EXP; break;
8360             case 0x1060: opcode = MOJOSHADER_PRESHADEROP_LOG; break;
8361             case 0x1070: opcode = MOJOSHADER_PRESHADEROP_RSQ; break;
8362             case 0x1080: opcode = MOJOSHADER_PRESHADEROP_SIN; break;
8363             case 0x1090: opcode = MOJOSHADER_PRESHADEROP_COS; break;
8364             case 0x10A0: opcode = MOJOSHADER_PRESHADEROP_ASIN; break;
8365             case 0x10B0: opcode = MOJOSHADER_PRESHADEROP_ACOS; break;
8366             case 0x10C0: opcode = MOJOSHADER_PRESHADEROP_ATAN; break;
8367             case 0x2000: opcode = MOJOSHADER_PRESHADEROP_MIN; break;
8368             case 0x2010: opcode = MOJOSHADER_PRESHADEROP_MAX; break;
8369             case 0x2020: opcode = MOJOSHADER_PRESHADEROP_LT; break;
8370             case 0x2030: opcode = MOJOSHADER_PRESHADEROP_GE; break;
8371             case 0x2040: opcode = MOJOSHADER_PRESHADEROP_ADD; break;
8372             case 0x2050: opcode = MOJOSHADER_PRESHADEROP_MUL; break;
8373             case 0x2060: opcode = MOJOSHADER_PRESHADEROP_ATAN2; break;
8374             case 0x2080: opcode = MOJOSHADER_PRESHADEROP_DIV; break;
8375             case 0x3000: opcode = MOJOSHADER_PRESHADEROP_CMP; break;
8376             case 0x3010: opcode = MOJOSHADER_PRESHADEROP_MOVC; break;
8377             case 0x5000: opcode = MOJOSHADER_PRESHADEROP_DOT; break;
8378             case 0x5020: opcode = MOJOSHADER_PRESHADEROP_NOISE; break;
8379             case 0xA000: opcode = MOJOSHADER_PRESHADEROP_MIN_SCALAR; break;
8380             case 0xA010: opcode = MOJOSHADER_PRESHADEROP_MAX_SCALAR; break;
8381             case 0xA020: opcode = MOJOSHADER_PRESHADEROP_LT_SCALAR; break;
8382             case 0xA030: opcode = MOJOSHADER_PRESHADEROP_GE_SCALAR; break;
8383             case 0xA040: opcode = MOJOSHADER_PRESHADEROP_ADD_SCALAR; break;
8384             case 0xA050: opcode = MOJOSHADER_PRESHADEROP_MUL_SCALAR; break;
8385             case 0xA060: opcode = MOJOSHADER_PRESHADEROP_ATAN2_SCALAR; break;
8386             case 0xA080: opcode = MOJOSHADER_PRESHADEROP_DIV_SCALAR; break;
8387             case 0xD000: opcode = MOJOSHADER_PRESHADEROP_DOT_SCALAR; break;
8388             case 0xD020: opcode = MOJOSHADER_PRESHADEROP_NOISE_SCALAR; break;
8389             default: fail(ctx, "Unknown preshader opcode."); break;
8390         } // switch
8391 
8392         uint32 operand_count = SWAP32(fxlc.tokens[1]) + 1;  // +1 for dest.
8393 
8394         inst->opcode = opcode;
8395         inst->element_count = (unsigned int) (opcodetok & 0xFF);
8396         inst->operand_count = (unsigned int) operand_count;
8397 
8398         fxlc.tokens += 2;
8399         fxlc.tokcount -= 2;
8400         if ((operand_count * 3) > fxlc.tokcount)
8401         {
8402             fail(ctx, "Bogus preshader FXLC block.");
8403             return;
8404         } // if
8405 
8406         MOJOSHADER_preshaderOperand *operand = inst->operands;
8407         while (operand_count--)
8408         {
8409             const unsigned int item = (unsigned int) SWAP32(fxlc.tokens[2]);
8410 
8411             // !!! FIXME: don't know what first token does.
8412             switch (SWAP32(fxlc.tokens[1]))
8413             {
8414                 case 1:  // literal from CLIT block.
8415                 {
8416                     if (item >= preshader->literal_count)
8417                     {
8418                         fail(ctx, "Bogus preshader literal index.");
8419                         break;
8420                     } // if
8421                     operand->type = MOJOSHADER_PRESHADEROPERAND_LITERAL;
8422                     break;
8423                 } // case
8424 
8425                 case 2:  // item from ctabdata.
8426                 {
8427                     int i;
8428                     MOJOSHADER_symbol *sym = ctabdata.symbols;
8429                     for (i = 0; i < ctabdata.symbol_count; i++, sym++)
8430                     {
8431                         const uint32 base = sym->register_index * 4;
8432                         const uint32 count = sym->register_count * 4;
8433                         assert(sym->register_set==MOJOSHADER_SYMREGSET_FLOAT4);
8434                         if ( (base <= item) && ((base + count) > item) )
8435                             break;
8436                     } // for
8437                     if (i == ctabdata.symbol_count)
8438                     {
8439                         fail(ctx, "Bogus preshader input index.");
8440                         break;
8441                     } // if
8442                     operand->type = MOJOSHADER_PRESHADEROPERAND_INPUT;
8443                     break;
8444                 } // case
8445 
8446                 case 4:
8447                 {
8448                     int i;
8449                     for (i = 0; i < output_map_count; i++)
8450                     {
8451                         const uint32 base = output_map[(i*2)] * 4;
8452                         const uint32 count = output_map[(i*2)+1] * 4;
8453                         if ( (base <= item) && ((base + count) > item) )
8454                             break;
8455                     } // for
8456                     if (i == output_map_count)
8457                     {
8458                         fail(ctx, "Bogus preshader output index.");
8459                         break;
8460                     } // if
8461 
8462                     operand->type = MOJOSHADER_PRESHADEROPERAND_OUTPUT;
8463                     break;
8464                 } // case
8465 
8466                 case 7:
8467                 {
8468                     operand->type = MOJOSHADER_PRESHADEROPERAND_TEMP;
8469                     if (item >= preshader->temp_count)
8470                         preshader->temp_count = item + 1;
8471                     break;
8472                 } // case
8473             } // switch
8474 
8475             operand->index = item;
8476 
8477             fxlc.tokens += 3;
8478             fxlc.tokcount -= 3;
8479             operand++;
8480         } // while
8481 
8482         inst++;
8483     } // while
8484 #endif
8485 } // parse_preshader
8486 
8487 
parse_comment_token(Context * ctx)8488 static int parse_comment_token(Context *ctx)
8489 {
8490     uint32 commenttoks = 0;
8491     if (is_comment_token(ctx, *ctx->tokens, &commenttoks))
8492     {
8493         if ((commenttoks >= 1) && (commenttoks < ctx->tokencount))
8494         {
8495             const uint32 id = SWAP32(ctx->tokens[1]);
8496             if (id == PRES_ID)
8497                 parse_preshader(ctx, commenttoks);
8498             else if (id == CTAB_ID)
8499             {
8500                 parse_constant_table(ctx, ctx->tokens, commenttoks * 4,
8501                                      ctx->version_token, 1, &ctx->ctab);
8502             } // else if
8503         } // if
8504         return commenttoks + 1;  // comment data plus the initial token.
8505     } // if
8506 
8507     return 0;  // not a comment token.
8508 } // parse_comment_token
8509 
8510 
parse_end_token(Context * ctx)8511 static int parse_end_token(Context *ctx)
8512 {
8513     if (SWAP32(*(ctx->tokens)) != 0x0000FFFF)   // end token always 0x0000FFFF.
8514         return 0;  // not us, eat no tokens.
8515 
8516     if (ctx->tokencount != 1)  // we _must_ be last. If not: fail.
8517         fail(ctx, "end token before end of stream");
8518 
8519     if (!isfail(ctx))
8520         ctx->profile->end_emitter(ctx);
8521 
8522     return 1;
8523 } // parse_end_token
8524 
8525 
parse_phase_token(Context * ctx)8526 static int parse_phase_token(Context *ctx)
8527 {
8528     // !!! FIXME: needs state; allow only one phase token per shader, I think?
8529     if (SWAP32(*(ctx->tokens)) != 0x0000FFFD) // phase token always 0x0000FFFD.
8530         return 0;  // not us, eat no tokens.
8531 
8532     if ( (!shader_is_pixel(ctx)) || (!shader_version_exactly(ctx, 1, 4)) )
8533         fail(ctx, "phase token only available in 1.4 pixel shaders");
8534 
8535     if (!isfail(ctx))
8536         ctx->profile->phase_emitter(ctx);
8537 
8538     return 1;
8539 } // parse_phase_token
8540 
8541 
parse_token(Context * ctx)8542 static int parse_token(Context *ctx)
8543 {
8544     int rc = 0;
8545 
8546     assert(ctx->output_stack_len == 0);
8547 
8548     if (ctx->tokencount == 0)
8549         fail(ctx, "unexpected end of shader.");
8550 
8551     else if ((rc = parse_comment_token(ctx)) != 0)
8552         return rc;
8553 
8554     else if ((rc = parse_end_token(ctx)) != 0)
8555         return rc;
8556 
8557     else if ((rc = parse_phase_token(ctx)) != 0)
8558         return rc;
8559 
8560     else if ((rc = parse_instruction_token(ctx)) != 0)
8561         return rc;
8562 
8563     failf(ctx, "unknown token (0x%x)", (uint) *ctx->tokens);
8564     return 1;  // good luck!
8565 } // parse_token
8566 
8567 
find_profile_id(const char * profile)8568 static int find_profile_id(const char *profile)
8569 {
8570     size_t i;
8571     for (i = 0; i < STATICARRAYLEN(profileMap); i++)
8572     {
8573         const char *name = profileMap[i].from;
8574         if (strcmp(name, profile) == 0)
8575         {
8576             profile = profileMap[i].to;
8577             break;
8578         } // if
8579     } // for
8580 
8581     for (i = 0; i < STATICARRAYLEN(profiles); i++)
8582     {
8583         const char *name = profiles[i].name;
8584         if (strcmp(name, profile) == 0)
8585             return i;
8586     } // for
8587 
8588     return -1;  // no match.
8589 } // find_profile_id
8590 
8591 
build_context(const char * profile,const unsigned char * tokenbuf,const unsigned int bufsize,const MOJOSHADER_swizzle * swiz,const unsigned int swizcount,const MOJOSHADER_samplerMap * smap,const unsigned int smapcount,MOJOSHADER_malloc m,MOJOSHADER_free f,void * d)8592 static Context *build_context(const char *profile,
8593                               const unsigned char *tokenbuf,
8594                               const unsigned int bufsize,
8595                               const MOJOSHADER_swizzle *swiz,
8596                               const unsigned int swizcount,
8597                               const MOJOSHADER_samplerMap *smap,
8598                               const unsigned int smapcount,
8599                               MOJOSHADER_malloc m, MOJOSHADER_free f, void *d)
8600 {
8601     if (m == NULL) m = MOJOSHADER_internal_malloc;
8602     if (f == NULL) f = MOJOSHADER_internal_free;
8603 
8604     Context *ctx = (Context *) m(sizeof (Context), d);
8605     if (ctx == NULL)
8606         return NULL;
8607 
8608     memset(ctx, '\0', sizeof (Context));
8609     ctx->malloc = m;
8610     ctx->free = f;
8611     ctx->malloc_data = d;
8612     ctx->tokens = (const uint32 *) tokenbuf;
8613     ctx->orig_tokens = (const uint32 *) tokenbuf;
8614     ctx->tokencount = bufsize / sizeof (uint32);
8615     ctx->swizzles = swiz;
8616     ctx->swizzles_count = swizcount;
8617     ctx->samplermap = smap;
8618     ctx->samplermap_count = smapcount;
8619     ctx->endline = ENDLINE_STR;
8620     ctx->endline_len = strlen(ctx->endline);
8621     ctx->last_address_reg_component = -1;
8622     ctx->current_position = MOJOSHADER_POSITION_BEFORE;
8623     ctx->texm3x2pad_dst0 = -1;
8624     ctx->texm3x2pad_src0 = -1;
8625     ctx->texm3x3pad_dst0 = -1;
8626     ctx->texm3x3pad_src0 = -1;
8627     ctx->texm3x3pad_dst1 = -1;
8628     ctx->texm3x3pad_src1 = -1;
8629 
8630     ctx->errors = errorlist_create(MallocBridge, FreeBridge, ctx);
8631     if (ctx->errors == NULL)
8632     {
8633         f(ctx, d);
8634         return NULL;
8635     } // if
8636 
8637     if (!set_output(ctx, &ctx->mainline))
8638     {
8639         errorlist_destroy(ctx->errors);
8640         f(ctx, d);
8641         return NULL;
8642     } // if
8643 
8644     const int profileid = find_profile_id(profile);
8645     ctx->profileid = profileid;
8646     if (profileid >= 0)
8647         ctx->profile = &profiles[profileid];
8648     else
8649         failf(ctx, "Profile '%s' is unknown or unsupported", profile);
8650 
8651     return ctx;
8652 } // build_context
8653 
8654 
free_constants_list(MOJOSHADER_free f,void * d,ConstantsList * item)8655 static void free_constants_list(MOJOSHADER_free f, void *d, ConstantsList *item)
8656 {
8657     while (item != NULL)
8658     {
8659         ConstantsList *next = item->next;
8660         f(item, d);
8661         item = next;
8662     } // while
8663 } // free_constants_list
8664 
8665 
free_variable_list(MOJOSHADER_free f,void * d,VariableList * item)8666 static void free_variable_list(MOJOSHADER_free f, void *d, VariableList *item)
8667 {
8668     while (item != NULL)
8669     {
8670         VariableList *next = item->next;
8671         f(item, d);
8672         item = next;
8673     } // while
8674 } // free_variable_list
8675 
8676 
free_sym_typeinfo(MOJOSHADER_free f,void * d,MOJOSHADER_symbolTypeInfo * typeinfo)8677 static void free_sym_typeinfo(MOJOSHADER_free f, void *d,
8678                               MOJOSHADER_symbolTypeInfo *typeinfo)
8679 {
8680     int i;
8681     for (i = 0; i < typeinfo->member_count; i++)
8682     {
8683         f((void *) typeinfo->members[i].name, d);
8684         free_sym_typeinfo(f, d, &typeinfo->members[i].info);
8685     } // for
8686     f((void *) typeinfo->members, d);
8687 } // free_sym_members
8688 
8689 
free_symbols(MOJOSHADER_free f,void * d,MOJOSHADER_symbol * syms,const int symcount)8690 static void free_symbols(MOJOSHADER_free f, void *d, MOJOSHADER_symbol *syms,
8691                          const int symcount)
8692 {
8693     int i;
8694     for (i = 0; i < symcount; i++)
8695     {
8696         f((void *) syms[i].name, d);
8697         free_sym_typeinfo(f, d, &syms[i].info);
8698     } // for
8699     f((void *) syms, d);
8700 } // free_symbols
8701 
8702 
free_preshader(MOJOSHADER_free f,void * d,MOJOSHADER_preshader * preshader)8703 static void free_preshader(MOJOSHADER_free f, void *d,
8704                            MOJOSHADER_preshader *preshader)
8705 {
8706     if (preshader != NULL)
8707     {
8708         f((void *) preshader->literals, d);
8709         f((void *) preshader->instructions, d);
8710         free_symbols(f, d, preshader->symbols, preshader->symbol_count);
8711         f((void *) preshader, d);
8712     } // if
8713 } // free_preshader
8714 
8715 
destroy_context(Context * ctx)8716 static void destroy_context(Context *ctx)
8717 {
8718     if (ctx != NULL)
8719     {
8720         MOJOSHADER_free f = ((ctx->free != NULL) ? ctx->free : MOJOSHADER_internal_free);
8721         void *d = ctx->malloc_data;
8722         buffer_destroy(ctx->preflight);
8723         buffer_destroy(ctx->globals);
8724         buffer_destroy(ctx->helpers);
8725         buffer_destroy(ctx->subroutines);
8726         buffer_destroy(ctx->mainline_intro);
8727         buffer_destroy(ctx->mainline);
8728         buffer_destroy(ctx->ignore);
8729         free_constants_list(f, d, ctx->constants);
8730         free_reglist(f, d, ctx->used_registers.next);
8731         free_reglist(f, d, ctx->defined_registers.next);
8732         free_reglist(f, d, ctx->uniforms.next);
8733         free_reglist(f, d, ctx->attributes.next);
8734         free_reglist(f, d, ctx->samplers.next);
8735         free_variable_list(f, d, ctx->variables);
8736         errorlist_destroy(ctx->errors);
8737         free_symbols(f, d, ctx->ctab.symbols, ctx->ctab.symbol_count);
8738         free_preshader(f, d, ctx->preshader);
8739         f(ctx, d);
8740     } // if
8741 } // destroy_context
8742 
8743 
build_output(Context * ctx,size_t * len)8744 static char *build_output(Context *ctx, size_t *len)
8745 {
8746     // add a byte for a null terminator.
8747     Buffer *buffers[] = {
8748         ctx->preflight, ctx->globals, ctx->helpers,
8749         ctx->subroutines, ctx->mainline_intro, ctx->mainline
8750         // don't append ctx->ignore ... that's why it's called "ignore"
8751     };
8752     char *retval = buffer_merge(buffers, STATICARRAYLEN(buffers), len);
8753     return retval;
8754 } // build_output
8755 
8756 
alloc_varname(Context * ctx,const RegisterList * reg)8757 static inline const char *alloc_varname(Context *ctx, const RegisterList *reg)
8758 {
8759     return ctx->profile->get_varname(ctx, reg->regtype, reg->regnum);
8760 } // alloc_varname
8761 
8762 
8763 // !!! FIXME: this code is sort of hard to follow:
8764 // !!! FIXME:  "var->used" only applies to arrays (at the moment, at least,
8765 // !!! FIXME:  but this might be buggy at a later time?), and this code
8766 // !!! FIXME:  relies on that.
8767 // !!! FIXME: "variables" means "things we found in a CTAB" but it's not
8768 // !!! FIXME:  all registers, etc.
8769 // !!! FIXME: "const_array" means an array for d3d "const" registers (c0, c1,
8770 // !!! FIXME:  etc), but not a constant array, although they _can_ be.
8771 // !!! FIXME: It's just a mess.  :/
build_uniforms(Context * ctx)8772 static MOJOSHADER_uniform *build_uniforms(Context *ctx)
8773 {
8774     const size_t len = sizeof (MOJOSHADER_uniform) * ctx->uniform_count;
8775     MOJOSHADER_uniform *retval = (MOJOSHADER_uniform *) Malloc(ctx, len);
8776 
8777     if (retval != NULL)
8778     {
8779         MOJOSHADER_uniform *wptr = retval;
8780         memset(wptr, '\0', len);
8781 
8782         VariableList *var;
8783         int written = 0;
8784         for (var = ctx->variables; var != NULL; var = var->next)
8785         {
8786             if (var->used)
8787             {
8788                 const char *name = ctx->profile->get_const_array_varname(ctx,
8789                                                       var->index, var->count);
8790                 if (name != NULL)
8791                 {
8792                     wptr->type = MOJOSHADER_UNIFORM_FLOAT;
8793                     wptr->index = var->index;
8794                     wptr->array_count = var->count;
8795                     wptr->constant = (var->constant != NULL) ? 1 : 0;
8796                     wptr->name = name;
8797                     wptr++;
8798                     written++;
8799                 } // if
8800             } // if
8801         } // for
8802 
8803         RegisterList *item = ctx->uniforms.next;
8804         MOJOSHADER_uniformType type = MOJOSHADER_UNIFORM_FLOAT;
8805         while (written < ctx->uniform_count)
8806         {
8807             int skip = 0;
8808 
8809             // !!! FIXME: does this fail if written > ctx->uniform_count?
8810             if (item == NULL)
8811             {
8812                 fail(ctx, "BUG: mismatched uniform list and count");
8813                 break;
8814             } // if
8815 
8816             int index = item->regnum;
8817             switch (item->regtype)
8818             {
8819                 case REG_TYPE_CONST:
8820                     skip = (item->array != NULL);
8821                     type = MOJOSHADER_UNIFORM_FLOAT;
8822                     break;
8823 
8824                 case REG_TYPE_CONSTINT:
8825                     type = MOJOSHADER_UNIFORM_INT;
8826                     break;
8827 
8828                 case REG_TYPE_CONSTBOOL:
8829                     type = MOJOSHADER_UNIFORM_BOOL;
8830                     break;
8831 
8832                 default:
8833                     fail(ctx, "unknown uniform datatype");
8834                     break;
8835             } // switch
8836 
8837             if (!skip)
8838             {
8839                 wptr->type = type;
8840                 wptr->index = index;
8841                 wptr->array_count = 0;
8842                 wptr->name = alloc_varname(ctx, item);
8843                 wptr++;
8844                 written++;
8845             } // if
8846 
8847             item = item->next;
8848         } // for
8849     } // if
8850 
8851     return retval;
8852 } // build_uniforms
8853 
8854 
build_constants(Context * ctx)8855 static MOJOSHADER_constant *build_constants(Context *ctx)
8856 {
8857     const size_t len = sizeof (MOJOSHADER_constant) * ctx->constant_count;
8858     MOJOSHADER_constant *retval = (MOJOSHADER_constant *) Malloc(ctx, len);
8859 
8860     if (retval != NULL)
8861     {
8862         ConstantsList *item = ctx->constants;
8863         int i;
8864 
8865         for (i = 0; i < ctx->constant_count; i++)
8866         {
8867             if (item == NULL)
8868             {
8869                 fail(ctx, "BUG: mismatched constant list and count");
8870                 break;
8871             } // if
8872 
8873             memcpy(&retval[i], &item->constant, sizeof (MOJOSHADER_constant));
8874             item = item->next;
8875         } // for
8876     } // if
8877 
8878     return retval;
8879 } // build_constants
8880 
8881 
build_samplers(Context * ctx)8882 static MOJOSHADER_sampler *build_samplers(Context *ctx)
8883 {
8884     const size_t len = sizeof (MOJOSHADER_sampler) * ctx->sampler_count;
8885     MOJOSHADER_sampler *retval = (MOJOSHADER_sampler *) Malloc(ctx, len);
8886 
8887     if (retval != NULL)
8888     {
8889         RegisterList *item = ctx->samplers.next;
8890         int i;
8891 
8892         memset(retval, '\0', len);
8893 
8894         for (i = 0; i < ctx->sampler_count; i++)
8895         {
8896             if (item == NULL)
8897             {
8898                 fail(ctx, "BUG: mismatched sampler list and count");
8899                 break;
8900             } // if
8901 
8902             assert(item->regtype == REG_TYPE_SAMPLER);
8903             retval[i].type = cvtD3DToMojoSamplerType((TextureType) item->index);
8904             retval[i].index = item->regnum;
8905             retval[i].name = alloc_varname(ctx, item);
8906             retval[i].texbem = (item->misc != 0) ? 1 : 0;
8907             item = item->next;
8908         } // for
8909     } // if
8910 
8911     return retval;
8912 } // build_samplers
8913 
8914 
build_attributes(Context * ctx,int * _count)8915 static MOJOSHADER_attribute *build_attributes(Context *ctx, int *_count)
8916 {
8917     int count = 0;
8918 
8919     if (ctx->attribute_count == 0)
8920     {
8921         *_count = 0;
8922         return NULL;  // nothing to do.
8923     } // if
8924 
8925     const size_t len = sizeof (MOJOSHADER_attribute) * ctx->attribute_count;
8926     MOJOSHADER_attribute *retval = (MOJOSHADER_attribute *) Malloc(ctx, len);
8927 
8928     if (retval != NULL)
8929     {
8930         RegisterList *item = ctx->attributes.next;
8931         MOJOSHADER_attribute *wptr = retval;
8932         int ignore = 0;
8933         int i;
8934 
8935         memset(retval, '\0', len);
8936 
8937         for (i = 0; i < ctx->attribute_count; i++)
8938         {
8939             if (item == NULL)
8940             {
8941                 fail(ctx, "BUG: mismatched attribute list and count");
8942                 break;
8943             } // if
8944 
8945             switch (item->regtype)
8946             {
8947                 case REG_TYPE_RASTOUT:
8948                 case REG_TYPE_ATTROUT:
8949                 case REG_TYPE_TEXCRDOUT:
8950                 case REG_TYPE_COLOROUT:
8951                 case REG_TYPE_DEPTHOUT:
8952                     ignore = 1;
8953                     break;
8954                 case REG_TYPE_TEXTURE:
8955                 case REG_TYPE_MISCTYPE:
8956                 case REG_TYPE_INPUT:
8957                     ignore = shader_is_pixel(ctx);
8958                     break;
8959                 default:
8960                     ignore = 0;
8961                     break;
8962             } // switch
8963 
8964             if (!ignore)
8965             {
8966                 if (shader_is_pixel(ctx))
8967                     fail(ctx, "BUG: pixel shader with vertex attributes");
8968                 else
8969                 {
8970                     wptr->usage = item->usage;
8971                     wptr->index = item->index;
8972                     wptr->name = alloc_varname(ctx, item);
8973                     wptr++;
8974                     count++;
8975                 } // else
8976             } // if
8977 
8978             item = item->next;
8979         } // for
8980     } // if
8981 
8982     *_count = count;
8983     return retval;
8984 } // build_attributes
8985 
build_outputs(Context * ctx,int * _count)8986 static MOJOSHADER_attribute *build_outputs(Context *ctx, int *_count)
8987 {
8988     int count = 0;
8989 
8990     if (ctx->attribute_count == 0)
8991     {
8992         *_count = 0;
8993         return NULL;  // nothing to do.
8994     } // if
8995 
8996     const size_t len = sizeof (MOJOSHADER_attribute) * ctx->attribute_count;
8997     MOJOSHADER_attribute *retval = (MOJOSHADER_attribute *) Malloc(ctx, len);
8998 
8999     if (retval != NULL)
9000     {
9001         RegisterList *item = ctx->attributes.next;
9002         MOJOSHADER_attribute *wptr = retval;
9003         int i;
9004 
9005         memset(retval, '\0', len);
9006 
9007         for (i = 0; i < ctx->attribute_count; i++)
9008         {
9009             if (item == NULL)
9010             {
9011                 fail(ctx, "BUG: mismatched attribute list and count");
9012                 break;
9013             } // if
9014 
9015             switch (item->regtype)
9016             {
9017                 case REG_TYPE_RASTOUT:
9018                 case REG_TYPE_ATTROUT:
9019                 case REG_TYPE_TEXCRDOUT:
9020                 case REG_TYPE_COLOROUT:
9021                 case REG_TYPE_DEPTHOUT:
9022                     wptr->usage = item->usage;
9023                     wptr->index = item->index;
9024                     wptr->name = alloc_varname(ctx, item);
9025                     wptr++;
9026                     count++;
9027                     break;
9028                 default:
9029                     break;
9030             } // switch
9031 
9032 
9033             item = item->next;
9034         } // for
9035     } // if
9036 
9037     *_count = count;
9038     return retval;
9039 } // build_outputs
9040 
9041 
build_parsedata(Context * ctx)9042 static MOJOSHADER_parseData *build_parsedata(Context *ctx)
9043 {
9044     char *output = NULL;
9045     MOJOSHADER_constant *constants = NULL;
9046     MOJOSHADER_uniform *uniforms = NULL;
9047     MOJOSHADER_attribute *attributes = NULL;
9048     MOJOSHADER_attribute *outputs = NULL;
9049     MOJOSHADER_sampler *samplers = NULL;
9050     MOJOSHADER_swizzle *swizzles = NULL;
9051     MOJOSHADER_error *errors = NULL;
9052     MOJOSHADER_parseData *retval = NULL;
9053     size_t output_len = 0;
9054     int attribute_count = 0;
9055     int output_count = 0;
9056 
9057     if (ctx->out_of_memory)
9058         return &MOJOSHADER_out_of_mem_data;
9059 
9060     retval = (MOJOSHADER_parseData*) Malloc(ctx, sizeof(MOJOSHADER_parseData));
9061     if (retval == NULL)
9062         return &MOJOSHADER_out_of_mem_data;
9063 
9064     memset(retval, '\0', sizeof (MOJOSHADER_parseData));
9065 
9066     if (!isfail(ctx))
9067         output = build_output(ctx, &output_len);
9068 
9069     if (!isfail(ctx))
9070         constants = build_constants(ctx);
9071 
9072     if (!isfail(ctx))
9073         uniforms = build_uniforms(ctx);
9074 
9075     if (!isfail(ctx))
9076         attributes = build_attributes(ctx, &attribute_count);
9077 
9078     if (!isfail(ctx))
9079         outputs = build_outputs(ctx, &output_count);
9080 
9081     if (!isfail(ctx))
9082         samplers = build_samplers(ctx);
9083 
9084     const int error_count = errorlist_count(ctx->errors);
9085     errors = errorlist_flatten(ctx->errors);
9086 
9087     if (!isfail(ctx))
9088     {
9089         if (ctx->swizzles_count > 0)
9090         {
9091             const int len = ctx->swizzles_count * sizeof (MOJOSHADER_swizzle);
9092             swizzles = (MOJOSHADER_swizzle *) Malloc(ctx, len);
9093             if (swizzles != NULL)
9094                 memcpy(swizzles, ctx->swizzles, len);
9095         } // if
9096     } // if
9097 
9098     // check again, in case build_output, etc, ran out of memory.
9099     if (isfail(ctx))
9100     {
9101         int i;
9102 
9103         Free(ctx, output);
9104         Free(ctx, constants);
9105         Free(ctx, swizzles);
9106 
9107         if (uniforms != NULL)
9108         {
9109             for (i = 0; i < ctx->uniform_count; i++)
9110                 Free(ctx, (void *) uniforms[i].name);
9111             Free(ctx, uniforms);
9112         } // if
9113 
9114         if (attributes != NULL)
9115         {
9116             for (i = 0; i < attribute_count; i++)
9117                 Free(ctx, (void *) attributes[i].name);
9118             Free(ctx, attributes);
9119         } // if
9120 
9121         if (outputs != NULL)
9122         {
9123             for (i = 0; i < output_count; i++)
9124                 Free(ctx, (void *) outputs[i].name);
9125             Free(ctx, outputs);
9126         } // if
9127 
9128         if (samplers != NULL)
9129         {
9130             for (i = 0; i < ctx->sampler_count; i++)
9131                 Free(ctx, (void *) samplers[i].name);
9132             Free(ctx, samplers);
9133         } // if
9134 
9135         if (ctx->out_of_memory)
9136         {
9137             for (i = 0; i < error_count; i++)
9138             {
9139                 Free(ctx, (void *) errors[i].filename);
9140                 Free(ctx, (void *) errors[i].error);
9141             } // for
9142             Free(ctx, errors);
9143             Free(ctx, retval);
9144             return &MOJOSHADER_out_of_mem_data;
9145         } // if
9146     } // if
9147     else
9148     {
9149         retval->profile = ctx->profile->name;
9150         retval->output = output;
9151         retval->output_len = (int) output_len;
9152         retval->instruction_count = ctx->instruction_count;
9153         retval->shader_type = ctx->shader_type;
9154         retval->major_ver = (int) ctx->major_ver;
9155         retval->minor_ver = (int) ctx->minor_ver;
9156         retval->uniform_count = ctx->uniform_count;
9157         retval->uniforms = uniforms;
9158         retval->constant_count = ctx->constant_count;
9159         retval->constants = constants;
9160         retval->sampler_count = ctx->sampler_count;
9161         retval->samplers = samplers;
9162         retval->attribute_count = attribute_count;
9163         retval->attributes = attributes;
9164         retval->output_count = output_count;
9165         retval->outputs = outputs;
9166         retval->swizzle_count = ctx->swizzles_count;
9167         retval->swizzles = swizzles;
9168         retval->symbol_count = ctx->ctab.symbol_count;
9169         retval->symbols = ctx->ctab.symbols;
9170         retval->preshader = ctx->preshader;
9171 
9172         // we don't own these now, retval does.
9173         ctx->ctab.symbols = NULL;
9174         ctx->preshader = NULL;
9175         ctx->ctab.symbol_count = 0;
9176     } // else
9177 
9178     retval->error_count = error_count;
9179     retval->errors = errors;
9180     retval->malloc = (ctx->malloc == MOJOSHADER_internal_malloc) ? NULL : ctx->malloc;
9181     retval->free = (ctx->free == MOJOSHADER_internal_free) ? NULL : ctx->free;
9182     retval->malloc_data = ctx->malloc_data;
9183 
9184     return retval;
9185 } // build_parsedata
9186 
9187 
process_definitions(Context * ctx)9188 static void process_definitions(Context *ctx)
9189 {
9190     // !!! FIXME: apparently, pre ps_3_0, sampler registers don't need to be
9191     // !!! FIXME:  DCL'd before use (default to 2d?). We aren't checking
9192     // !!! FIXME:  this at the moment, though.
9193 
9194     determine_constants_arrays(ctx);  // in case this hasn't been called yet.
9195 
9196     RegisterList *uitem = &ctx->uniforms;
9197     RegisterList *prev = &ctx->used_registers;
9198     RegisterList *item = prev->next;
9199 
9200     while (item != NULL)
9201     {
9202         RegisterList *next = item->next;
9203         const RegisterType regtype = item->regtype;
9204         const int regnum = item->regnum;
9205 
9206         if (!get_defined_register(ctx, regtype, regnum))
9207         {
9208             // haven't already dealt with this one.
9209             switch (regtype)
9210             {
9211                 // !!! FIXME: I'm not entirely sure this is right...
9212                 case REG_TYPE_RASTOUT:
9213                 case REG_TYPE_ATTROUT:
9214                 case REG_TYPE_TEXCRDOUT:
9215                 case REG_TYPE_COLOROUT:
9216                 case REG_TYPE_DEPTHOUT:
9217                     if (shader_is_vertex(ctx)&&shader_version_atleast(ctx,3,0))
9218                     {
9219                         fail(ctx, "vs_3 can't use output registers"
9220                                   " without declaring them first.");
9221                         return;
9222                     } // if
9223 
9224                     // Apparently this is an attribute that wasn't DCL'd.
9225                     //  Add it to the attribute list; deal with it later.
9226                     add_attribute_register(ctx, regtype, regnum,
9227                                            MOJOSHADER_USAGE_UNKNOWN, 0, 0xF, 0);
9228                     break;
9229 
9230                 case REG_TYPE_ADDRESS:
9231                 case REG_TYPE_PREDICATE:
9232                 case REG_TYPE_TEMP:
9233                 case REG_TYPE_LOOP:
9234                 case REG_TYPE_LABEL:
9235                     ctx->profile->global_emitter(ctx, regtype, regnum);
9236                     break;
9237 
9238                 case REG_TYPE_CONST:
9239                 case REG_TYPE_CONSTINT:
9240                 case REG_TYPE_CONSTBOOL:
9241                     // separate uniforms into a different list for now.
9242                     prev->next = next;
9243                     item->next = NULL;
9244                     uitem->next = item;
9245                     uitem = item;
9246                     item = prev;
9247                     break;
9248 
9249                 case REG_TYPE_INPUT:
9250                     // You don't have to dcl_ your inputs in Shader Model 1.
9251                     if (shader_is_pixel(ctx)&&!shader_version_atleast(ctx,2,0))
9252                     {
9253                         add_attribute_register(ctx, regtype, regnum,
9254                                                MOJOSHADER_USAGE_COLOR, regnum,
9255                                                0xF, 0);
9256                         break;
9257                     } // if
9258                     // fall through...
9259 
9260                 default:
9261                     fail(ctx, "BUG: we used a register we don't know how to define.");
9262             } // switch
9263         } // if
9264 
9265         prev = item;
9266         item = next;
9267     } // while
9268 
9269     // okay, now deal with uniform/constant arrays...
9270     VariableList *var;
9271     for (var = ctx->variables; var != NULL; var = var->next)
9272     {
9273         if (var->used)
9274         {
9275             if (var->constant)
9276             {
9277                 ctx->profile->const_array_emitter(ctx, var->constant,
9278                                                   var->index, var->count);
9279             } // if
9280             else
9281             {
9282                 ctx->profile->array_emitter(ctx, var);
9283                 ctx->uniform_float4_count += var->count;
9284                 ctx->uniform_count++;
9285             } // else
9286         } // if
9287     } // for
9288 
9289     // ...and uniforms...
9290     for (item = ctx->uniforms.next; item != NULL; item = item->next)
9291     {
9292         int arraysize = -1;
9293 
9294         // check if this is a register contained in an array...
9295         if (item->regtype == REG_TYPE_CONST)
9296         {
9297             for (var = ctx->variables; var != NULL; var = var->next)
9298             {
9299                 if (!var->used)
9300                     continue;
9301 
9302                 const int regnum = item->regnum;
9303                 const int lo = var->index;
9304                 if ( (regnum >= lo) && (regnum < (lo + var->count)) )
9305                 {
9306                     assert(!var->constant);
9307                     item->array = var;  // used when building parseData.
9308                     arraysize = var->count;
9309                     break;
9310                 } // if
9311             } // for
9312         } // if
9313 
9314         ctx->profile->uniform_emitter(ctx, item->regtype, item->regnum, var);
9315 
9316         if (arraysize < 0)  // not part of an array?
9317         {
9318             ctx->uniform_count++;
9319             switch (item->regtype)
9320             {
9321                 case REG_TYPE_CONST: ctx->uniform_float4_count++; break;
9322                 case REG_TYPE_CONSTINT: ctx->uniform_int4_count++; break;
9323                 case REG_TYPE_CONSTBOOL: ctx->uniform_bool_count++; break;
9324                 default: break;
9325             } // switch
9326         } // if
9327     } // for
9328 
9329     // ...and samplers...
9330     for (item = ctx->samplers.next; item != NULL; item = item->next)
9331     {
9332         ctx->sampler_count++;
9333         ctx->profile->sampler_emitter(ctx, item->regnum,
9334                                       (TextureType) item->index,
9335                                       item->misc != 0);
9336     } // for
9337 
9338     // ...and attributes...
9339     for (item = ctx->attributes.next; item != NULL; item = item->next)
9340     {
9341         ctx->attribute_count++;
9342         ctx->profile->attribute_emitter(ctx, item->regtype, item->regnum,
9343                                         item->usage, item->index,
9344                                         item->writemask, item->misc);
9345     } // for
9346 } // process_definitions
9347 
9348 
verify_swizzles(Context * ctx)9349 static void verify_swizzles(Context *ctx)
9350 {
9351     size_t i;
9352     const char *failmsg = "invalid swizzle";
9353     for (i = 0; i < ctx->swizzles_count; i++)
9354     {
9355         const MOJOSHADER_swizzle *swiz = &ctx->swizzles[i];
9356         if (swiz->swizzles[0] > 3) { fail(ctx, failmsg); return; }
9357         if (swiz->swizzles[1] > 3) { fail(ctx, failmsg); return; }
9358         if (swiz->swizzles[2] > 3) { fail(ctx, failmsg); return; }
9359         if (swiz->swizzles[3] > 3) { fail(ctx, failmsg); return; }
9360     } // for
9361 } // verify_swizzles
9362 
9363 
9364 // API entry point...
9365 
9366 // !!! FIXME:
9367 // MSDN: "Shader validation will fail CreatePixelShader on any shader that
9368 //  attempts to read from a temporary register that has not been written by a
9369 //  previous instruction."  (true for ps_1_*, maybe others). Check this.
9370 
MOJOSHADER_parse(const char * profile,const unsigned char * tokenbuf,const unsigned int bufsize,const MOJOSHADER_swizzle * swiz,const unsigned int swizcount,const MOJOSHADER_samplerMap * smap,const unsigned int smapcount,MOJOSHADER_malloc m,MOJOSHADER_free f,void * d)9371 const MOJOSHADER_parseData *MOJOSHADER_parse(const char *profile,
9372                                              const unsigned char *tokenbuf,
9373                                              const unsigned int bufsize,
9374                                              const MOJOSHADER_swizzle *swiz,
9375                                              const unsigned int swizcount,
9376                                              const MOJOSHADER_samplerMap *smap,
9377                                              const unsigned int smapcount,
9378                                              MOJOSHADER_malloc m,
9379                                              MOJOSHADER_free f, void *d)
9380 {
9381     MOJOSHADER_parseData *retval = NULL;
9382     Context *ctx = NULL;
9383     int rc = 0;
9384     int failed = 0;
9385 
9386     if ( ((m == NULL) && (f != NULL)) || ((m != NULL) && (f == NULL)) )
9387         return &MOJOSHADER_out_of_mem_data;  // supply both or neither.
9388 
9389     ctx = build_context(profile, tokenbuf, bufsize, swiz, swizcount,
9390                         smap, smapcount, m, f, d);
9391     if (ctx == NULL)
9392         return &MOJOSHADER_out_of_mem_data;
9393 
9394     if (isfail(ctx))
9395     {
9396         retval = build_parsedata(ctx);
9397         destroy_context(ctx);
9398         return retval;
9399     } // if
9400 
9401     verify_swizzles(ctx);
9402 
9403     // Version token always comes first.
9404     ctx->current_position = 0;
9405     rc = parse_version_token(ctx, profile);
9406 
9407     // drop out now if this definitely isn't bytecode. Saves lots of
9408     //  meaningless errors flooding through.
9409     if (rc < 0)
9410     {
9411         retval = build_parsedata(ctx);
9412         destroy_context(ctx);
9413         return retval;
9414     } // if
9415 
9416     if ( ((uint32) rc) > ctx->tokencount )
9417     {
9418         fail(ctx, "Corrupted or truncated shader");
9419         ctx->tokencount = rc;
9420     } // if
9421 
9422     adjust_token_position(ctx, rc);
9423 
9424     // parse out the rest of the tokens after the version token...
9425     while (ctx->tokencount > 0)
9426     {
9427         // reset for each token.
9428         if (isfail(ctx))
9429         {
9430             failed = 1;
9431             ctx->isfail = 0;
9432         } // if
9433 
9434         rc = parse_token(ctx);
9435         if ( ((uint32) rc) > ctx->tokencount )
9436         {
9437             fail(ctx, "Corrupted or truncated shader");
9438             break;
9439         } // if
9440 
9441         adjust_token_position(ctx, rc);
9442     } // while
9443 
9444     ctx->current_position = MOJOSHADER_POSITION_AFTER;
9445 
9446     // for ps_1_*, the output color is written to r0...throw an
9447     //  error if this register was never written. This isn't
9448     //  important for vertex shaders, or shader model 2+.
9449     if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
9450     {
9451         if (!register_was_written(ctx, REG_TYPE_TEMP, 0))
9452             fail(ctx, "r0 (pixel shader 1.x color output) never written to");
9453     } // if
9454 
9455     if (!failed)
9456     {
9457         process_definitions(ctx);
9458         failed = isfail(ctx);
9459     } // if
9460 
9461     if (!failed)
9462         ctx->profile->finalize_emitter(ctx);
9463 
9464     ctx->isfail = failed;
9465     retval = build_parsedata(ctx);
9466     destroy_context(ctx);
9467     return retval;
9468 } // MOJOSHADER_parse
9469 
9470 
MOJOSHADER_freeParseData(const MOJOSHADER_parseData * _data)9471 void MOJOSHADER_freeParseData(const MOJOSHADER_parseData *_data)
9472 {
9473     MOJOSHADER_parseData *data = (MOJOSHADER_parseData *) _data;
9474     if ((data == NULL) || (data == &MOJOSHADER_out_of_mem_data))
9475         return;  // no-op.
9476 
9477     MOJOSHADER_free f = (data->free == NULL) ? MOJOSHADER_internal_free : data->free;
9478     void *d = data->malloc_data;
9479     int i;
9480 
9481     // we don't f(data->profile), because that's internal static data.
9482 
9483     f((void *) data->output, d);
9484     f((void *) data->constants, d);
9485     f((void *) data->swizzles, d);
9486 
9487     for (i = 0; i < data->error_count; i++)
9488     {
9489         f((void *) data->errors[i].error, d);
9490         f((void *) data->errors[i].filename, d);
9491     } // for
9492     f((void *) data->errors, d);
9493 
9494     for (i = 0; i < data->uniform_count; i++)
9495         f((void *) data->uniforms[i].name, d);
9496     f((void *) data->uniforms, d);
9497 
9498     for (i = 0; i < data->attribute_count; i++)
9499         f((void *) data->attributes[i].name, d);
9500     f((void *) data->attributes, d);
9501 
9502     for (i = 0; i < data->output_count; i++)
9503         f((void *) data->outputs[i].name, d);
9504     f((void *) data->outputs, d);
9505 
9506     for (i = 0; i < data->sampler_count; i++)
9507         f((void *) data->samplers[i].name, d);
9508     f((void *) data->samplers, d);
9509 
9510     free_symbols(f, d, data->symbols, data->symbol_count);
9511     free_preshader(f, d, data->preshader);
9512 
9513     f(data, d);
9514 } // MOJOSHADER_freeParseData
9515 
9516 
MOJOSHADER_version(void)9517 int MOJOSHADER_version(void)
9518 {
9519     return MOJOSHADER_VERSION;
9520 } // MOJOSHADER_version
9521 
9522 
MOJOSHADER_changeset(void)9523 const char *MOJOSHADER_changeset(void)
9524 {
9525     return MOJOSHADER_CHANGESET;
9526 } // MOJOSHADER_changeset
9527 
9528 
MOJOSHADER_maxShaderModel(const char * profile)9529 int MOJOSHADER_maxShaderModel(const char *profile)
9530 {
9531     #define PROFILE_SHADER_MODEL(p,v) if (strcmp(profile, p) == 0) return v;
9532     PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_D3D, 3);
9533     PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_BYTECODE, 3);
9534     PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_GLSL, 3);
9535     PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_GLSL120, 3);
9536     PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_ARB1, 2);
9537     PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV2, 2);
9538     PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV3, 2);
9539     PROFILE_SHADER_MODEL(MOJOSHADER_PROFILE_NV4, 3);
9540     #undef PROFILE_SHADER_MODEL
9541     return -1;  // unknown profile?
9542 } // MOJOSHADER_maxShaderModel
9543 
9544 // end of mojoshader.c ...
9545 
9546